xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 3cfa4ea460eed5c3d3ab4210924c568cabc7705a)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscReal       *eigs,thresh;
20   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
21   PetscBool       allocated_S_St;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscErrorCode  ierr;
26 
27   PetscFunctionBegin;
28   if (!sub_schurs->use_mumps) {
29     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
30   }
31 
32   if (pcbddc->dbg_flag) {
33     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
34     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
35     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
36     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
37   }
38 
39   if (pcbddc->dbg_flag) {
40     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
41   }
42 
43   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
44     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
45   }
46 
47   /* max size of subsets */
48   mss = 0;
49   for (i=0;i<sub_schurs->n_subs;i++) {
50     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
51       PetscInt subset_size;
52       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
53       mss = PetscMax(mss,subset_size);
54     }
55   }
56 
57   /* min/max and threshold */
58   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
59   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
60   nmax = PetscMax(nmin,nmax);
61   allocated_S_St = PETSC_FALSE;
62   if (nmin) {
63     allocated_S_St = PETSC_TRUE;
64   }
65 
66   /* allocate lapack workspace */
67   cum = cum2 = 0;
68   maxneigs = 0;
69   for (i=0;i<sub_schurs->n_subs;i++) {
70     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
71       PetscInt n,subset_size;
72 
73       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
74       n = PetscMin(subset_size,nmax);
75       cum += subset_size;
76       cum2 += subset_size*n;
77       maxneigs = PetscMax(maxneigs,n);
78     }
79   }
80   if (mss) {
81     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
82       PetscBLASInt B_itype = 1;
83       PetscBLASInt B_N = mss;
84       PetscReal    zero = 0.0;
85       PetscReal    eps = 0.0; /* dlamch? */
86 
87       B_lwork = -1;
88       S = NULL;
89       St = NULL;
90       eigs = NULL;
91       eigv = NULL;
92       B_iwork = NULL;
93       B_ifail = NULL;
94       thresh = 1.0;
95       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
96 #if defined(PETSC_USE_COMPLEX)
97       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
98 #else
99       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
100 #endif
101       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
102       ierr = PetscFPTrapPop();CHKERRQ(ierr);
103     } else {
104         /* TODO */
105     }
106   } else {
107     lwork = 0;
108   }
109 
110   nv = 0;
111   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
112     ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr);
113   }
114   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
115   if (allocated_S_St) {
116     ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr);
117   }
118   ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
119 #if defined(PETSC_USE_COMPLEX)
120   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
121 #endif
122   ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
123                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
124                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
125                       nv+cum,&pcbddc->adaptive_constraints_idxs,
126                       nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
127   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
128 
129   maxneigs = 0;
130   cum = cum2 = cumarray = 0;
131   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
132   pcbddc->adaptive_constraints_data_ptr[0] = 0;
133   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
134     const PetscInt *idxs;
135 
136     ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
137     for (cum=0;cum<nv;cum++) {
138       pcbddc->adaptive_constraints_n[cum] = 1;
139       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
140       pcbddc->adaptive_constraints_data[cum] = 1.0;
141       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
142       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
143     }
144     cum2 = cum;
145     ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
146   }
147 
148   if (mss) { /* multilevel */
149     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
150     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
151   }
152 
153   for (i=0;i<sub_schurs->n_subs;i++) {
154     PetscInt j,subset_size;
155 
156     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
157     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
158       const PetscInt *idxs;
159       PetscReal      infty = PETSC_MAX_REAL;
160       PetscInt       eigs_start = 0;
161       PetscBLASInt   B_N;
162       PetscBool      same_data = PETSC_FALSE;
163 
164       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
165       if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
166         if (sub_schurs->is_hermitian) {
167           PetscInt j,k;
168           if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */
169             ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
170             ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
171           }
172           for (j=0;j<subset_size;j++) {
173             for (k=j;k<subset_size;k++) {
174               S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
175               St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
176             }
177           }
178         } else {
179           ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
180           ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
181         }
182       } else {
183         S = Sarray + cumarray;
184         St = Starray + cumarray;
185       }
186 
187       ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
188       /* see if we can save some work */
189       if (sub_schurs->n_subs == 1) {
190         ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr);
191       }
192 
193       if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */
194         B_neigs = 0;
195       } else {
196         /* Threshold: this is an heuristic for edges */
197         thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold;
198 
199         if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
200           PetscBLASInt B_itype = 1;
201           PetscBLASInt B_IL, B_IU;
202           PetscReal    eps = -1.0; /* dlamch? */
203           PetscInt     nmin_s;
204 
205           /* ask for eigenvalues larger than thresh */
206           if (pcbddc->dbg_flag) {
207             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
208           }
209           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
210 #if defined(PETSC_USE_COMPLEX)
211           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
212 #else
213           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
214 #endif
215           ierr = PetscFPTrapPop();CHKERRQ(ierr);
216           if (B_ierr) {
217             if (B_ierr < 0 ) {
218               SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
219             } else if (B_ierr <= B_N) {
220               SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
221             } else {
222               SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
223             }
224           }
225 
226           if (B_neigs > nmax) {
227             if (pcbddc->dbg_flag) {
228               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
229             }
230             eigs_start = B_neigs -nmax;
231             B_neigs = nmax;
232           }
233 
234           nmin_s = PetscMin(nmin,B_N);
235           if (B_neigs < nmin_s) {
236             PetscBLASInt B_neigs2;
237 
238             B_IU = B_N - B_neigs;
239             B_IL = B_N - nmin_s + 1;
240             if (pcbddc->dbg_flag) {
241               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
242             }
243             if (sub_schurs->is_hermitian) {
244               PetscInt j;
245               for (j=0;j<subset_size;j++) {
246                 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
247               }
248               for (j=0;j<subset_size;j++) {
249                 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
250               }
251             } else {
252               ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
253               ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
254             }
255             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
256 #if defined(PETSC_USE_COMPLEX)
257             PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
258 #else
259             PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
260 #endif
261             ierr = PetscFPTrapPop();CHKERRQ(ierr);
262             B_neigs += B_neigs2;
263           }
264           if (B_ierr) {
265             if (B_ierr < 0 ) {
266               SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
267             } else if (B_ierr <= B_N) {
268               SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
269             } else {
270               SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
271             }
272           }
273           if (pcbddc->dbg_flag) {
274             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);
275             for (j=0;j<B_neigs;j++) {
276               if (eigs[j] == 0.0) {
277                 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");
278               } else {
279                 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);
280               }
281             }
282           }
283         } else {
284             /* TODO */
285         }
286       }
287       maxneigs = PetscMax(B_neigs,maxneigs);
288       pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
289       if (B_neigs) {
290         ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
291 
292         if (pcbddc->dbg_flag > 1) {
293           PetscInt ii;
294           for (ii=0;ii<B_neigs;ii++) {
295             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
296             for (j=0;j<B_N;j++) {
297               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
298             }
299           }
300         }
301 #if 0
302         for (j=0;j<B_neigs;j++) {
303           PetscBLASInt Blas_N,Blas_one = 1.0;
304           PetscScalar norm;
305           ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
306           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,
307                                                      &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
308           if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
309             norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
310           } else {
311             norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
312           }
313           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
314         }
315 #endif
316         ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
317         pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
318         pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
319         cum++;
320       }
321       ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
322     }
323     /* shift for next computation */
324     cumarray += subset_size*subset_size;
325   }
326   if (pcbddc->dbg_flag) {
327     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
328   }
329 
330   if (mss) {
331     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
332     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
333     /* destroy matrices (junk) */
334     ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
335     ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
336   }
337   if (allocated_S_St) {
338     ierr = PetscFree2(S,St);CHKERRQ(ierr);
339   }
340   ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
341 #if defined(PETSC_USE_COMPLEX)
342   ierr = PetscFree(rwork);CHKERRQ(ierr);
343 #endif
344   if (pcbddc->dbg_flag) {
345     PetscInt maxneigs_r;
346     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
347     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
348   }
349   PetscFunctionReturn(0);
350 }
351 
352 #undef __FUNCT__
353 #define __FUNCT__ "PCBDDCSetUpSolvers"
354 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
355 {
356   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
357   PetscScalar    *coarse_submat_vals;
358   PetscErrorCode ierr;
359 
360   PetscFunctionBegin;
361   /* Setup local scatters R_to_B and (optionally) R_to_D */
362   /* PCBDDCSetUpLocalWorkVectors should be called first! */
363   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
364 
365   /* Setup local neumann solver ksp_R */
366   /* PCBDDCSetUpLocalScatters should be called first! */
367   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
368 
369   /* Change global null space passed in by the user if change of basis has been requested */
370   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
371     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
372   }
373 
374   /*
375      Setup local correction and local part of coarse basis.
376      Gives back the dense local part of the coarse matrix in column major ordering
377   */
378   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
379 
380   /* Compute total number of coarse nodes and setup coarse solver */
381   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
382 
383   /* free */
384   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
385   PetscFunctionReturn(0);
386 }
387 
388 #undef __FUNCT__
389 #define __FUNCT__ "PCBDDCResetCustomization"
390 PetscErrorCode PCBDDCResetCustomization(PC pc)
391 {
392   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
393   PetscErrorCode ierr;
394 
395   PetscFunctionBegin;
396   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
397   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
398   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
399   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
400   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
401   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
402   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
403   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
404   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
405   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
406   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
407   PetscFunctionReturn(0);
408 }
409 
410 #undef __FUNCT__
411 #define __FUNCT__ "PCBDDCResetTopography"
412 PetscErrorCode PCBDDCResetTopography(PC pc)
413 {
414   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
415   PetscErrorCode ierr;
416 
417   PetscFunctionBegin;
418   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
419   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
420   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
421   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
422   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
423   PetscFunctionReturn(0);
424 }
425 
426 #undef __FUNCT__
427 #define __FUNCT__ "PCBDDCResetSolvers"
428 PetscErrorCode PCBDDCResetSolvers(PC pc)
429 {
430   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
431   PetscScalar    *array;
432   PetscErrorCode ierr;
433 
434   PetscFunctionBegin;
435   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
436   if (pcbddc->coarse_phi_B) {
437     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
438     ierr = PetscFree(array);CHKERRQ(ierr);
439   }
440   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
441   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
442   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
443   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
444   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
445   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
446   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
447   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
448   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
449   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
450   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
451   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
452   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
453   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
454   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
455   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
456   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
457   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
458   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
459   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
460   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
461   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
462   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
463   PetscFunctionReturn(0);
464 }
465 
466 #undef __FUNCT__
467 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
468 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
469 {
470   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
471   PC_IS          *pcis = (PC_IS*)pc->data;
472   VecType        impVecType;
473   PetscInt       n_constraints,n_R,old_size;
474   PetscErrorCode ierr;
475 
476   PetscFunctionBegin;
477   if (!pcbddc->ConstraintMatrix) {
478     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
479   }
480   /* get sizes */
481   n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices;
482   n_R = pcis->n-pcbddc->n_vertices;
483   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
484   /* local work vectors (try to avoid unneeded work)*/
485   /* R nodes */
486   old_size = -1;
487   if (pcbddc->vec1_R) {
488     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
489   }
490   if (n_R != old_size) {
491     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
492     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
493     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
494     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
495     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
496     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
497   }
498   /* local primal dofs */
499   old_size = -1;
500   if (pcbddc->vec1_P) {
501     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
502   }
503   if (pcbddc->local_primal_size != old_size) {
504     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
505     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
506     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
507     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
508   }
509   /* local explicit constraints */
510   old_size = -1;
511   if (pcbddc->vec1_C) {
512     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
513   }
514   if (n_constraints && n_constraints != old_size) {
515     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
516     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
517     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
518     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
519   }
520   PetscFunctionReturn(0);
521 }
522 
523 #undef __FUNCT__
524 #define __FUNCT__ "PCBDDCSetUpCorrection"
525 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
526 {
527   PetscErrorCode  ierr;
528   /* pointers to pcis and pcbddc */
529   PC_IS*          pcis = (PC_IS*)pc->data;
530   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
531   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
532   /* submatrices of local problem */
533   Mat             A_RV,A_VR,A_VV,local_auxmat2_R;
534   /* submatrices of local coarse problem */
535   Mat             S_VV,S_CV,S_VC,S_CC;
536   /* working matrices */
537   Mat             C_CR;
538   /* additional working stuff */
539   PC              pc_R;
540   Mat             F;
541   PetscBool       isLU,isCHOL,isILU;
542 
543   PetscScalar     *coarse_submat_vals; /* TODO: use a PETSc matrix */
544   PetscScalar     *work;
545   PetscInt        *idx_V_B;
546   PetscInt        n,n_vertices,n_constraints;
547   PetscInt        i,n_R,n_D,n_B;
548   PetscBool       unsymmetric_check;
549   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
550   MatType         impMatType;
551   /* some shortcuts to scalars */
552   PetscScalar     one=1.0,m_one=-1.0;
553 
554   PetscFunctionBegin;
555   n_vertices = pcbddc->n_vertices;
556   n_constraints = pcbddc->local_primal_size-n_vertices;
557   /* Set Non-overlapping dimensions */
558   n_B = pcis->n_B;
559   n_D = pcis->n - n_B;
560   n_R = pcis->n - n_vertices;
561 
562   /* Set types for local objects needed by BDDC precondtioner */
563   impMatType = MATSEQDENSE;
564 
565   /* vertices in boundary numbering */
566   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
567   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr);
568   if (i != n_vertices) {
569     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
570   }
571 
572   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
573   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
574   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
575   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
576   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
577   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
578   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
579   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
580   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
581   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
582 
583   unsymmetric_check = PETSC_FALSE;
584   /* allocate workspace */
585   n = 0;
586   if (n_constraints) {
587     n += n_R*n_constraints;
588   }
589   if (n_vertices) {
590     n = PetscMax(2*n_R*n_vertices,n);
591     n = PetscMax((n_R+n_B)*n_vertices,n);
592   }
593   if (!pcbddc->symmetric_primal) {
594     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
595     unsymmetric_check = PETSC_TRUE;
596   }
597   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
598 
599   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
600   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
601   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
602   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
603   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
604   if (isLU || isILU || isCHOL) {
605     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
606   } else if (sub_schurs->reuse_mumps) {
607     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
608     MatFactorType type;
609 
610     F = reuse_mumps->F;
611     ierr = MatGetFactorType(F,&type);CHKERRQ(ierr);
612     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
613   } else {
614     F = NULL;
615   }
616 
617   /* Precompute stuffs needed for preprocessing and application of BDDC*/
618   if (n_constraints) {
619     Mat         M1,M2,M3;
620     Mat         auxmat;
621     IS          is_aux;
622     PetscScalar *array,*array2;
623 
624     ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
625     ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
626 
627     /* Extract constraints on R nodes: C_{CR}  */
628     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
629     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
630     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr);
631 
632     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
633     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
634     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
635     for (i=0;i<n_constraints;i++) {
636       const PetscScalar *row_cmat_values;
637       const PetscInt    *row_cmat_indices;
638       PetscInt          size_of_constraint,j;
639 
640       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
641       for (j=0;j<size_of_constraint;j++) {
642         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
643       }
644       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
645     }
646     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr);
647     if (F) {
648       Mat B;
649 
650       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
651       ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr);
652       ierr = MatDestroy(&B);CHKERRQ(ierr);
653     } else {
654       PetscScalar *marr;
655 
656       ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
657       for (i=0;i<n_constraints;i++) {
658         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
659         ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr);
660         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
661         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
662         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
663       }
664       ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
665     }
666     if (!pcbddc->switch_static) {
667       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr);
668       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
669       ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
670       for (i=0;i<n_constraints;i++) {
671         ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr);
672         ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr);
673         ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
674         ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
675         ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
676         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
677       }
678       ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
679       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
680       ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
681     } else {
682       ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr);
683       pcbddc->local_auxmat2 = local_auxmat2_R;
684       ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
685     }
686     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
687     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
688     ierr = MatScale(M3,m_one);CHKERRQ(ierr);
689     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
690     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
691     if (isCHOL) {
692       ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr);
693     } else {
694       ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
695     }
696     ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr);
697     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
698     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
699     ierr = MatDestroy(&M2);CHKERRQ(ierr);
700     ierr = MatDestroy(&M3);CHKERRQ(ierr);
701     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
702     ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
703     ierr = MatDestroy(&auxmat);CHKERRQ(ierr);
704     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
705     ierr = MatDestroy(&M1);CHKERRQ(ierr);
706   }
707   /* Get submatrices from subdomain matrix */
708   if (n_vertices) {
709     Mat       newmat;
710     IS        is_aux;
711     PetscInt  ibs,mbs;
712     PetscBool issbaij;
713 
714     if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */
715       IS tis;
716 
717       ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr);
718       ierr = ISSort(tis);CHKERRQ(ierr);
719       ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr);
720       ierr = ISDestroy(&tis);CHKERRQ(ierr);
721     } else {
722       ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
723     }
724     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
725     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
726     if (ibs != mbs) { /* need to convert to SEQAIJ */
727       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
728       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
729       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
730       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
731       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
732       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
733     } else {
734       /* this is safe */
735       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
736       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
737       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
738         ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
739         ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
740         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
741         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
742         ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
743         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
744       } else {
745         ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
746         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
747         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
748       }
749       /* need to convert to SeqAIJ for MatMatMult with dense matrix */
750       ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
751     }
752     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
753   }
754 
755   /* Matrix of coarse basis functions (local) */
756   if (pcbddc->coarse_phi_B) {
757     PetscInt on_B,on_primal,on_D=n_D;
758     if (pcbddc->coarse_phi_D) {
759       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
760     }
761     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
762     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
763       PetscScalar *marray;
764 
765       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
766       ierr = PetscFree(marray);CHKERRQ(ierr);
767       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
768       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
769       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
770       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
771     }
772   }
773 
774   if (!pcbddc->coarse_phi_B) {
775     PetscScalar *marray;
776 
777     n = n_B*pcbddc->local_primal_size;
778     if (pcbddc->switch_static || pcbddc->dbg_flag) {
779       n += n_D*pcbddc->local_primal_size;
780     }
781     if (!pcbddc->symmetric_primal) {
782       n *= 2;
783     }
784     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
785     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
786     n = n_B*pcbddc->local_primal_size;
787     if (pcbddc->switch_static || pcbddc->dbg_flag) {
788       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
789       n += n_D*pcbddc->local_primal_size;
790     }
791     if (!pcbddc->symmetric_primal) {
792       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
793       if (pcbddc->switch_static || pcbddc->dbg_flag) {
794         n = n_B*pcbddc->local_primal_size;
795         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
796       }
797     } else {
798       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
799       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
800       if (pcbddc->switch_static || pcbddc->dbg_flag) {
801         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
802         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
803       }
804     }
805   }
806   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
807   /* vertices */
808   if (n_vertices) {
809 
810     if (n_R) {
811       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
812       PetscBLASInt B_N,B_one = 1;
813       PetscScalar  *x,*y;
814 
815       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
816       ierr = MatScale(A_RV,m_one);CHKERRQ(ierr);
817       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
818       if (F) { /* TODO could be optimized for symmetric problems */
819         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
820       } else {
821         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
822         for (i=0;i<n_vertices;i++) {
823           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
824           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
825           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
826           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
827           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
828         }
829         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
830       }
831       ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
832       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
833       if (n_constraints) {
834         Mat B;
835 
836         ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
837         for (i=0;i<n_vertices;i++) {
838           ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
839           ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr);
840           ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
841           ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
842           ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
843           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
844         }
845         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
846         ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
847         ierr = MatDestroy(&B);CHKERRQ(ierr);
848         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
849         ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
850         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
851         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
852         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
853         ierr = MatDestroy(&B);CHKERRQ(ierr);
854       }
855       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
856       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
857       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
858       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
859       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
860       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
861       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
862       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
863       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
864       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
865     } else {
866       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
867     }
868     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
869     /* coarse basis functions */
870     for (i=0;i<n_vertices;i++) {
871       PetscScalar *y;
872 
873       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
874       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
875       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
876       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
877       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
878       y[n_B*i+idx_V_B[i]] = 1.0;
879       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
880       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
881 
882       if (pcbddc->switch_static || pcbddc->dbg_flag) {
883         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
884         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
885         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
887         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
888         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
889       }
890       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
891     }
892   }
893 
894   if (n_constraints) {
895     Mat B;
896 
897     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
898     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
899     ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
900     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
901     if (n_vertices) {
902       if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
903         ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr);
904       } else {
905         Mat S_VCt;
906 
907         ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr);
908         ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
909         ierr = MatDestroy(&S_VCt);CHKERRQ(ierr);
910       }
911     }
912     ierr = MatDestroy(&B);CHKERRQ(ierr);
913     /* coarse basis functions */
914     for (i=0;i<n_constraints;i++) {
915       PetscScalar *y;
916 
917       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
918       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
919       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
920       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
921       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
922       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
923       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
924       if (pcbddc->switch_static || pcbddc->dbg_flag) {
925         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
926         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
927         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
928         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
929         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
930         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
931       }
932       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
933     }
934   }
935   if (n_constraints) {
936     ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr);
937   }
938 
939   /* compute other basis functions for non-symmetric problems */
940   if (!pcbddc->symmetric_primal) {
941 
942     if (n_constraints) {
943       Mat S_CCT,B_C;
944 
945       /* this is a lazy thing */
946       ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
947       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
948       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
949       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
950       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
951       if (n_vertices) {
952         Mat B_V,S_VCT;
953 
954         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
955         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
956         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
957         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
958         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
959       }
960       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
961     } else { /* if there are no constraints, reset work */
962       ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
963     }
964     if (n_vertices && n_R) {
965       Mat          A_VRT;
966       PetscScalar  *marray;
967       PetscBLASInt B_N,B_one = 1;
968 
969       ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr);
970       ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
971       ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr);
972       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
973       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one));
974       ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr);
975       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
976     }
977 
978     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
979       for (i=0;i<pcbddc->local_primal_size;i++) {
980         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
981         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
982         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
983         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
984         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
985       }
986     } else {
987       for (i=0;i<pcbddc->local_primal_size;i++) {
988         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
989         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
990         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
991         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
992         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
993       }
994     }
995     /* coarse basis functions */
996     for (i=0;i<pcbddc->local_primal_size;i++) {
997       PetscScalar *y;
998 
999       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
1000       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
1001       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
1002       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1003       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1004       if (i<n_vertices) {
1005         y[n_B*i+idx_V_B[i]] = 1.0;
1006       }
1007       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
1008       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1009 
1010       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1011         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1012         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1013         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1014         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1015         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1016         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1017       }
1018       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1019     }
1020   }
1021   /* free memory */
1022   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
1023   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
1024   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
1025   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
1026   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
1027   ierr = PetscFree(work);CHKERRQ(ierr);
1028   if (n_vertices) {
1029     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1030   }
1031   if (n_constraints) {
1032     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1033   }
1034   /* Checking coarse_sub_mat and coarse basis functios */
1035   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1036   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1037   if (pcbddc->dbg_flag) {
1038     Mat         coarse_sub_mat;
1039     Mat         AUXMAT,TM1,TM2,TM3,TM4;
1040     Mat         coarse_phi_D,coarse_phi_B;
1041     Mat         coarse_psi_D,coarse_psi_B;
1042     Mat         A_II,A_BB,A_IB,A_BI;
1043     Mat         C_B,CPHI;
1044     IS          is_dummy;
1045     Vec         mones;
1046     MatType     checkmattype=MATSEQAIJ;
1047     PetscReal   real_value;
1048 
1049     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
1050     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
1051     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
1052     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
1053     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
1054     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
1055     if (unsymmetric_check) {
1056       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
1057       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
1058     }
1059     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
1060 
1061     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1062     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr);
1063     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1064     if (unsymmetric_check) {
1065       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1066       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1067       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1068       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1069       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1070       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1071       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1072       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1073       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1074       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1075       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1076       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1077     } else {
1078       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1079       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1080       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1081       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1082       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1083       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1084       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1085       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1086     }
1087     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1088     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1089     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1090     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1091     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1092     ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1093     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1094     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d      matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1095 
1096     /* check constraints */
1097     ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr);
1098     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1099     ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1100     ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr);
1101     ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1102     ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1103     ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1104     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1105     if (unsymmetric_check) {
1106       ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1107       ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1108       ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1109       ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1110       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1111     }
1112     ierr = MatDestroy(&C_B);CHKERRQ(ierr);
1113     ierr = MatDestroy(&CPHI);CHKERRQ(ierr);
1114     ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1115     ierr = VecDestroy(&mones);CHKERRQ(ierr);
1116 
1117     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1118     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1119     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1120     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1121     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1122     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1123     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1124     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1125     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1126     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1127     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1128     if (unsymmetric_check) {
1129       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1130       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1131     }
1132     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1133   }
1134   /* get back data */
1135   *coarse_submat_vals_n = coarse_submat_vals;
1136   PetscFunctionReturn(0);
1137 }
1138 
1139 #undef __FUNCT__
1140 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1141 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1142 {
1143   Mat            *work_mat;
1144   IS             isrow_s,iscol_s;
1145   PetscBool      rsorted,csorted;
1146   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1147   PetscErrorCode ierr;
1148 
1149   PetscFunctionBegin;
1150   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1151   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1152   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1153   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1154 
1155   if (!rsorted) {
1156     const PetscInt *idxs;
1157     PetscInt *idxs_sorted,i;
1158 
1159     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1160     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1161     for (i=0;i<rsize;i++) {
1162       idxs_perm_r[i] = i;
1163     }
1164     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1165     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1166     for (i=0;i<rsize;i++) {
1167       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1168     }
1169     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1170     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1171   } else {
1172     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1173     isrow_s = isrow;
1174   }
1175 
1176   if (!csorted) {
1177     if (isrow == iscol) {
1178       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1179       iscol_s = isrow_s;
1180     } else {
1181       const PetscInt *idxs;
1182       PetscInt *idxs_sorted,i;
1183 
1184       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1185       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1186       for (i=0;i<csize;i++) {
1187         idxs_perm_c[i] = i;
1188       }
1189       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1190       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1191       for (i=0;i<csize;i++) {
1192         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1193       }
1194       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1195       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1196     }
1197   } else {
1198     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1199     iscol_s = iscol;
1200   }
1201 
1202   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1203 
1204   if (!rsorted || !csorted) {
1205     Mat      new_mat;
1206     IS       is_perm_r,is_perm_c;
1207 
1208     if (!rsorted) {
1209       PetscInt *idxs_r,i;
1210       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1211       for (i=0;i<rsize;i++) {
1212         idxs_r[idxs_perm_r[i]] = i;
1213       }
1214       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1215       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1216     } else {
1217       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1218     }
1219     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1220 
1221     if (!csorted) {
1222       if (isrow_s == iscol_s) {
1223         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1224         is_perm_c = is_perm_r;
1225       } else {
1226         PetscInt *idxs_c,i;
1227         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1228         for (i=0;i<csize;i++) {
1229           idxs_c[idxs_perm_c[i]] = i;
1230         }
1231         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1232         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1233       }
1234     } else {
1235       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1236     }
1237     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1238 
1239     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1240     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1241     work_mat[0] = new_mat;
1242     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1243     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1244   }
1245 
1246   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1247   *B = work_mat[0];
1248   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1249   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1250   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 #undef __FUNCT__
1255 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1256 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1257 {
1258   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1259   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1260   Mat            new_mat;
1261   IS             is_local,is_global;
1262   PetscInt       local_size;
1263   PetscBool      isseqaij;
1264   PetscErrorCode ierr;
1265 
1266   PetscFunctionBegin;
1267   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1268   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1269   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1270   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1271   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1272   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1273   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1274 
1275   /* check */
1276   if (pcbddc->dbg_flag) {
1277     Vec       x,x_change;
1278     PetscReal error;
1279 
1280     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1281     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1282     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1283     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1284     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1285     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1286     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1287     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1288     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1289     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1290     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1291     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1292     ierr = VecDestroy(&x);CHKERRQ(ierr);
1293     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1294   }
1295 
1296   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1297   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1298   if (isseqaij) {
1299     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1300   } else {
1301     Mat work_mat;
1302     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1303     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1304     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1305   }
1306   if (matis->A->symmetric_set) {
1307     ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr);
1308 #if !defined(PETSC_USE_COMPLEX)
1309     ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr);
1310 #endif
1311   }
1312   /*
1313   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1314   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1315   */
1316   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1317   PetscFunctionReturn(0);
1318 }
1319 
1320 #undef __FUNCT__
1321 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1322 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1323 {
1324   PC_IS*          pcis = (PC_IS*)(pc->data);
1325   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
1326   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1327   PetscInt        *idx_R_local=NULL;
1328   PetscInt        n_vertices,i,j,n_R,n_D,n_B;
1329   PetscInt        vbs,bs;
1330   PetscBT         bitmask=NULL;
1331   PetscErrorCode  ierr;
1332 
1333   PetscFunctionBegin;
1334   /*
1335     No need to setup local scatters if
1336       - primal space is unchanged
1337         AND
1338       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1339         AND
1340       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1341   */
1342   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1343     PetscFunctionReturn(0);
1344   }
1345   /* destroy old objects */
1346   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1347   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1348   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1349   /* Set Non-overlapping dimensions */
1350   n_B = pcis->n_B;
1351   n_D = pcis->n - n_B;
1352   n_vertices = pcbddc->n_vertices;
1353 
1354   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1355 
1356   /* create auxiliary bitmask and allocate workspace */
1357   if (!sub_schurs->reuse_mumps) {
1358     ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1359     ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1360     for (i=0;i<n_vertices;i++) {
1361       ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr);
1362     }
1363 
1364     for (i=0, n_R=0; i<pcis->n; i++) {
1365       if (!PetscBTLookup(bitmask,i)) {
1366         idx_R_local[n_R++] = i;
1367       }
1368     }
1369   } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */
1370     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1371 
1372     ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1373     ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr);
1374   }
1375 
1376   /* Block code */
1377   vbs = 1;
1378   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1379   if (bs>1 && !(n_vertices%bs)) {
1380     PetscBool is_blocked = PETSC_TRUE;
1381     PetscInt  *vary;
1382     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1383     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1384     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1385     for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
1386     for (i=0; i<n_vertices/bs; i++) {
1387       if (vary[i]!=0 && vary[i]!=bs) {
1388         is_blocked = PETSC_FALSE;
1389         break;
1390       }
1391     }
1392     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1393       vbs = bs;
1394       for (i=0;i<n_R/vbs;i++) {
1395         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1396       }
1397     }
1398     ierr = PetscFree(vary);CHKERRQ(ierr);
1399   }
1400   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1401   if (sub_schurs->reuse_mumps) {
1402     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1403 
1404     ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1405     ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr);
1406     ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr);
1407     reuse_mumps->is_R = pcbddc->is_R_local;
1408   } else {
1409     ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1410   }
1411 
1412   /* print some info if requested */
1413   if (pcbddc->dbg_flag) {
1414     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1415     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1416     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1417     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1418     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1419     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1420     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1421   }
1422 
1423   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1424   if (!sub_schurs->reuse_mumps) {
1425     IS       is_aux1,is_aux2;
1426     PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1427 
1428     ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1429     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1430     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1431     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1432     for (i=0; i<n_D; i++) {
1433       ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1434     }
1435     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1436     for (i=0, j=0; i<n_R; i++) {
1437       if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1438         aux_array1[j++] = i;
1439       }
1440     }
1441     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1442     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1443     for (i=0, j=0; i<n_B; i++) {
1444       if (!PetscBTLookup(bitmask,is_indices[i])) {
1445         aux_array2[j++] = i;
1446       }
1447     }
1448     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1449     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1450     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1451     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1452     ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1453 
1454     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1455       ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1456       for (i=0, j=0; i<n_R; i++) {
1457         if (PetscBTLookup(bitmask,idx_R_local[i])) {
1458           aux_array1[j++] = i;
1459         }
1460       }
1461       ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1462       ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1463       ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1464     }
1465     ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1466     ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1467   } else {
1468     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1469     IS               tis;
1470     PetscInt         schur_size;
1471 
1472     ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr);
1473     ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr);
1474     ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr);
1475     ierr = ISDestroy(&tis);CHKERRQ(ierr);
1476     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1477       ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr);
1478       ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1479       ierr = ISDestroy(&tis);CHKERRQ(ierr);
1480     }
1481   }
1482   PetscFunctionReturn(0);
1483 }
1484 
1485 
1486 #undef __FUNCT__
1487 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1488 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1489 {
1490   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1491   PC_IS          *pcis = (PC_IS*)pc->data;
1492   PC             pc_temp;
1493   Mat            A_RR;
1494   MatReuse       reuse;
1495   PetscScalar    m_one = -1.0;
1496   PetscReal      value;
1497   PetscInt       n_D,n_R,ibs,mbs;
1498   PetscBool      use_exact,use_exact_reduced,issbaij;
1499   PetscErrorCode ierr;
1500   /* prefixes stuff */
1501   char           dir_prefix[256],neu_prefix[256],str_level[16];
1502   size_t         len;
1503 
1504   PetscFunctionBegin;
1505 
1506   /* compute prefixes */
1507   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1508   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1509   if (!pcbddc->current_level) {
1510     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1511     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1512     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1513     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1514   } else {
1515     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1516     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1517     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1518     len -= 15; /* remove "pc_bddc_coarse_" */
1519     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1520     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1521     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1522     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1523     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1524     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1525     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1526     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1527   }
1528 
1529   /* DIRICHLET PROBLEM */
1530   if (dirichlet) {
1531     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1532     if (pcbddc->local_mat->symmetric_set) {
1533       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
1534     }
1535     /* Matrix for Dirichlet problem is pcis->A_II */
1536     n_D = pcis->n - pcis->n_B;
1537     if (!pcbddc->ksp_D) { /* create object if not yet build */
1538       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1539       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1540       /* default */
1541       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1542       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1543       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1544       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1545       if (issbaij) {
1546         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1547       } else {
1548         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1549       }
1550       /* Allow user's customization */
1551       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1552       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1553     }
1554     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1555     if (sub_schurs->reuse_mumps) {
1556       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1557 
1558       ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr);
1559     }
1560     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1561     if (!n_D) {
1562       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1563       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1564     }
1565     /* Set Up KSP for Dirichlet problem of BDDC */
1566     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1567     /* set ksp_D into pcis data */
1568     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1569     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1570     pcis->ksp_D = pcbddc->ksp_D;
1571   }
1572 
1573   /* NEUMANN PROBLEM */
1574   A_RR = 0;
1575   if (neumann) {
1576     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1577     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1578     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1579     if (pcbddc->ksp_R) { /* already created ksp */
1580       PetscInt nn_R;
1581       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1582       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1583       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1584       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1585         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1586         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1587         reuse = MAT_INITIAL_MATRIX;
1588       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1589         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1590           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1591           reuse = MAT_INITIAL_MATRIX;
1592         } else { /* safe to reuse the matrix */
1593           reuse = MAT_REUSE_MATRIX;
1594         }
1595       }
1596       /* last check */
1597       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1598         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1599         reuse = MAT_INITIAL_MATRIX;
1600       }
1601     } else { /* first time, so we need to create the matrix */
1602       reuse = MAT_INITIAL_MATRIX;
1603     }
1604     /* extract A_RR */
1605     if (!sub_schurs->reuse_mumps) {
1606       ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1607       ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1608       if (ibs != mbs) {
1609         Mat newmat;
1610         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1611         ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1612         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1613       } else {
1614         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1615       }
1616       if (pcbddc->local_mat->symmetric_set) {
1617         ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
1618       }
1619     } else {
1620       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1621 
1622       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1623       ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr);
1624       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1625     }
1626     if (!pcbddc->ksp_R) { /* create object if not present */
1627       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1628       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1629       /* default */
1630       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1631       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1632       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1633       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1634       if (issbaij) {
1635         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1636       } else {
1637         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1638       }
1639       /* Allow user's customization */
1640       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1641       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1642     }
1643     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1644     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1645     if (!n_R) {
1646       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1647       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1648     }
1649     /* Reuse MUMPS solver if it is present */
1650     if (sub_schurs->reuse_mumps) {
1651       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1652 
1653       ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr);
1654     }
1655     /* Set Up KSP for Neumann problem of BDDC */
1656     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1657   }
1658   /* free Neumann problem's matrix */
1659   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1660 
1661   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1662   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1663     if (pcbddc->dbg_flag) {
1664       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1665       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1666       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1667     }
1668     if (dirichlet) { /* Dirichlet */
1669       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1670       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1671       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1672       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1673       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1674       /* need to be adapted? */
1675       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1676       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1677       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1678       /* print info */
1679       if (pcbddc->dbg_flag) {
1680         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1681         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1682       }
1683       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1684         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1685       }
1686     }
1687     if (neumann) { /* Neumann */
1688       ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr);
1689       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1690       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1691       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1692       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1693       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1694       /* need to be adapted? */
1695       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1696       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1697       /* print info */
1698       if (pcbddc->dbg_flag) {
1699         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1700         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1701       }
1702       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1703         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1704       }
1705     }
1706   }
1707   PetscFunctionReturn(0);
1708 }
1709 
1710 #undef __FUNCT__
1711 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1712 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
1713 {
1714   PetscErrorCode  ierr;
1715   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1716   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1717 
1718   PetscFunctionBegin;
1719   if (!sub_schurs->reuse_mumps) {
1720     ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr);
1721   }
1722   if (!pcbddc->switch_static) {
1723     if (applytranspose && pcbddc->local_auxmat1) {
1724       ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1725       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1726     }
1727     if (!sub_schurs->reuse_mumps) {
1728       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1729       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1730     } else {
1731       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1732 
1733       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1734       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1735     }
1736   } else {
1737     ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1738     ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1739     ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1740     ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1741     if (applytranspose && pcbddc->local_auxmat1) {
1742       ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr);
1743       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1744       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1745       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1746     }
1747   }
1748   if (!sub_schurs->reuse_mumps) {
1749     if (applytranspose) {
1750       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1751     } else {
1752       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1753     }
1754 #if defined(PETSC_HAVE_MUMPS)
1755   } else {
1756     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1757 
1758     if (applytranspose) {
1759       ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
1760     } else {
1761       ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
1762     }
1763 #endif
1764   }
1765   ierr = VecSet(inout_B,0.);CHKERRQ(ierr);
1766   if (!pcbddc->switch_static) {
1767     if (!sub_schurs->reuse_mumps) {
1768       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1769       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1770     } else {
1771       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1772 
1773       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1774       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1775     }
1776     if (!applytranspose && pcbddc->local_auxmat1) {
1777       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1778       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1779     }
1780   } else {
1781     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1782     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1783     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1784     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1785     if (!applytranspose && pcbddc->local_auxmat1) {
1786       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1787       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1788     }
1789     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1790     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1791     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1792     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1793   }
1794   PetscFunctionReturn(0);
1795 }
1796 
1797 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1798 #undef __FUNCT__
1799 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1800 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1801 {
1802   PetscErrorCode ierr;
1803   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1804   PC_IS*            pcis = (PC_IS*)  (pc->data);
1805   const PetscScalar zero = 0.0;
1806 
1807   PetscFunctionBegin;
1808   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1809   if (applytranspose) {
1810     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1811     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1812   } else {
1813     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1814     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1815   }
1816   /* start communications from local primal nodes to rhs of coarse solver */
1817   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1818   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1819   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1820 
1821   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1822   /* TODO remove null space when doing multilevel */
1823   if (pcbddc->coarse_ksp) {
1824     Vec rhs,sol;
1825 
1826     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
1827     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
1828     if (applytranspose) {
1829       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1830     } else {
1831       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1832     }
1833   }
1834 
1835   /* Local solution on R nodes */
1836   if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */
1837     ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr);
1838   }
1839 
1840   /* communications from coarse sol to local primal nodes */
1841   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1842   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1843 
1844   /* Sum contributions from two levels */
1845   if (applytranspose) {
1846     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1847     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1848   } else {
1849     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1850     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1851   }
1852   PetscFunctionReturn(0);
1853 }
1854 
1855 #undef __FUNCT__
1856 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1857 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1858 {
1859   PetscErrorCode ierr;
1860   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1861   PetscScalar    *array;
1862   Vec            from,to;
1863 
1864   PetscFunctionBegin;
1865   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1866     from = pcbddc->coarse_vec;
1867     to = pcbddc->vec1_P;
1868     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1869       Vec tvec;
1870 
1871       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1872       ierr = VecResetArray(tvec);CHKERRQ(ierr);
1873       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1874       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
1875       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
1876       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
1877     }
1878   } else { /* from local to global -> put data in coarse right hand side */
1879     from = pcbddc->vec1_P;
1880     to = pcbddc->coarse_vec;
1881   }
1882   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1883   PetscFunctionReturn(0);
1884 }
1885 
1886 #undef __FUNCT__
1887 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1888 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1889 {
1890   PetscErrorCode ierr;
1891   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1892   PetscScalar    *array;
1893   Vec            from,to;
1894 
1895   PetscFunctionBegin;
1896   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1897     from = pcbddc->coarse_vec;
1898     to = pcbddc->vec1_P;
1899   } else { /* from local to global -> put data in coarse right hand side */
1900     from = pcbddc->vec1_P;
1901     to = pcbddc->coarse_vec;
1902   }
1903   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1904   if (smode == SCATTER_FORWARD) {
1905     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1906       Vec tvec;
1907 
1908       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1909       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
1910       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
1911       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
1912     }
1913   } else {
1914     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
1915      ierr = VecResetArray(from);CHKERRQ(ierr);
1916     }
1917   }
1918   PetscFunctionReturn(0);
1919 }
1920 
1921 /* uncomment for testing purposes */
1922 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1923 #undef __FUNCT__
1924 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1925 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1926 {
1927   PetscErrorCode    ierr;
1928   PC_IS*            pcis = (PC_IS*)(pc->data);
1929   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1930   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1931   /* one and zero */
1932   PetscScalar       one=1.0,zero=0.0;
1933   /* space to store constraints and their local indices */
1934   PetscScalar       *constraints_data;
1935   PetscInt          *constraints_idxs,*constraints_idxs_B;
1936   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
1937   PetscInt          *constraints_n;
1938   /* iterators */
1939   PetscInt          i,j,k,total_counts,total_counts_cc,cum;
1940   /* BLAS integers */
1941   PetscBLASInt      lwork,lierr;
1942   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1943   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1944   /* reuse */
1945   PetscInt          olocal_primal_size,olocal_primal_size_cc;
1946   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
1947   /* change of basis */
1948   PetscBool         qr_needed;
1949   PetscBT           change_basis,qr_needed_idx;
1950   /* auxiliary stuff */
1951   PetscInt          *nnz,*is_indices;
1952   PetscInt          ncc;
1953   /* some quantities */
1954   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1955   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1956 
1957   PetscFunctionBegin;
1958   /* Destroy Mat objects computed previously */
1959   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1960   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1961   /* save info on constraints from previous setup (if any) */
1962   olocal_primal_size = pcbddc->local_primal_size;
1963   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
1964   ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr);
1965   ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
1966   ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
1967   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
1968   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
1969 
1970   /* print some info */
1971   if (pcbddc->dbg_flag) {
1972     IS       vertices;
1973     PetscInt nv,nedges,nfaces;
1974     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
1975     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
1976     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
1977     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1978     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1979     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
1980     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
1981     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
1982     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1983   }
1984 
1985   if (!pcbddc->adaptive_selection) {
1986     IS           ISForVertices,*ISForFaces,*ISForEdges;
1987     MatNullSpace nearnullsp;
1988     const Vec    *nearnullvecs;
1989     Vec          *localnearnullsp;
1990     PetscScalar  *array;
1991     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
1992     PetscBool    nnsp_has_cnst;
1993     /* LAPACK working arrays for SVD or POD */
1994     PetscBool    skip_lapack,boolforchange;
1995     PetscScalar  *work;
1996     PetscReal    *singular_vals;
1997 #if defined(PETSC_USE_COMPLEX)
1998     PetscReal    *rwork;
1999 #endif
2000 #if defined(PETSC_MISSING_LAPACK_GESVD)
2001     PetscScalar  *temp_basis,*correlation_mat;
2002 #else
2003     PetscBLASInt dummy_int=1;
2004     PetscScalar  dummy_scalar=1.;
2005 #endif
2006 
2007     /* Get index sets for faces, edges and vertices from graph */
2008     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
2009     /* free unneeded index sets */
2010     if (!pcbddc->use_vertices) {
2011       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2012     }
2013     if (!pcbddc->use_edges) {
2014       for (i=0;i<n_ISForEdges;i++) {
2015         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2016       }
2017       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2018       n_ISForEdges = 0;
2019     }
2020     if (!pcbddc->use_faces) {
2021       for (i=0;i<n_ISForFaces;i++) {
2022         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2023       }
2024       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2025       n_ISForFaces = 0;
2026     }
2027 
2028 #if defined(PETSC_USE_DEBUG)
2029     /* HACK: when solving singular problems not using vertices, a change of basis is mandatory.
2030        Also use_change_of_basis should be consistent among processors */
2031     if (pcbddc->NullSpace) {
2032       PetscBool tbool[2],gbool[2];
2033 
2034       if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) {
2035         pcbddc->use_change_of_basis = PETSC_TRUE;
2036         if (!ISForEdges) {
2037           pcbddc->use_change_on_faces = PETSC_TRUE;
2038         }
2039       }
2040       tbool[0] = pcbddc->use_change_of_basis;
2041       tbool[1] = pcbddc->use_change_on_faces;
2042       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2043       pcbddc->use_change_of_basis = gbool[0];
2044       pcbddc->use_change_on_faces = gbool[1];
2045     }
2046 #endif
2047 
2048     /* check if near null space is attached to global mat */
2049     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
2050     if (nearnullsp) {
2051       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
2052       /* remove any stored info */
2053       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
2054       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2055       /* store information for BDDC solver reuse */
2056       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
2057       pcbddc->onearnullspace = nearnullsp;
2058       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2059       for (i=0;i<nnsp_size;i++) {
2060         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
2061       }
2062     } else { /* if near null space is not provided BDDC uses constants by default */
2063       nnsp_size = 0;
2064       nnsp_has_cnst = PETSC_TRUE;
2065     }
2066     /* get max number of constraints on a single cc */
2067     max_constraints = nnsp_size;
2068     if (nnsp_has_cnst) max_constraints++;
2069 
2070     /*
2071          Evaluate maximum storage size needed by the procedure
2072          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
2073          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
2074          There can be multiple constraints per connected component
2075                                                                                                                                                            */
2076     n_vertices = 0;
2077     if (ISForVertices) {
2078       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
2079     }
2080     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
2081     ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr);
2082 
2083     total_counts = n_ISForFaces+n_ISForEdges;
2084     total_counts *= max_constraints;
2085     total_counts += n_vertices;
2086     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2087 
2088     total_counts = 0;
2089     max_size_of_constraint = 0;
2090     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
2091       IS used_is;
2092       if (i<n_ISForEdges) {
2093         used_is = ISForEdges[i];
2094       } else {
2095         used_is = ISForFaces[i-n_ISForEdges];
2096       }
2097       ierr = ISGetSize(used_is,&j);CHKERRQ(ierr);
2098       total_counts += j;
2099       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
2100     }
2101     ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr);
2102 
2103     /* get local part of global near null space vectors */
2104     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
2105     for (k=0;k<nnsp_size;k++) {
2106       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
2107       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2108       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2109     }
2110 
2111     /* whether or not to skip lapack calls */
2112     skip_lapack = PETSC_TRUE;
2113     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
2114 
2115     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
2116     if (!skip_lapack) {
2117       PetscScalar temp_work;
2118 
2119 #if defined(PETSC_MISSING_LAPACK_GESVD)
2120       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
2121       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
2122       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
2123       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
2124 #if defined(PETSC_USE_COMPLEX)
2125       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
2126 #endif
2127       /* now we evaluate the optimal workspace using query with lwork=-1 */
2128       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2129       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
2130       lwork = -1;
2131       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2132 #if !defined(PETSC_USE_COMPLEX)
2133       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
2134 #else
2135       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
2136 #endif
2137       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2138       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
2139 #else /* on missing GESVD */
2140       /* SVD */
2141       PetscInt max_n,min_n;
2142       max_n = max_size_of_constraint;
2143       min_n = max_constraints;
2144       if (max_size_of_constraint < max_constraints) {
2145         min_n = max_size_of_constraint;
2146         max_n = max_constraints;
2147       }
2148       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
2149 #if defined(PETSC_USE_COMPLEX)
2150       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
2151 #endif
2152       /* now we evaluate the optimal workspace using query with lwork=-1 */
2153       lwork = -1;
2154       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
2155       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
2156       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
2157       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2158 #if !defined(PETSC_USE_COMPLEX)
2159       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
2160 #else
2161       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
2162 #endif
2163       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2164       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
2165 #endif /* on missing GESVD */
2166       /* Allocate optimal workspace */
2167       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
2168       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
2169     }
2170     /* Now we can loop on constraining sets */
2171     total_counts = 0;
2172     constraints_idxs_ptr[0] = 0;
2173     constraints_data_ptr[0] = 0;
2174     /* vertices */
2175     if (n_vertices) {
2176       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2177       if (nnsp_has_cnst) { /* it considers all possible vertices */
2178         ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2179         for (i=0;i<n_vertices;i++) {
2180           constraints_n[total_counts] = 1;
2181           constraints_data[total_counts] = 1.0;
2182           constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2183           constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2184           total_counts++;
2185         }
2186       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
2187         PetscBool used_vertex;
2188         for (i=0;i<n_vertices;i++) {
2189           used_vertex = PETSC_FALSE;
2190           k = 0;
2191           while (!used_vertex && k<nnsp_size) {
2192             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2193             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
2194               constraints_n[total_counts] = 1;
2195               constraints_idxs[total_counts] = is_indices[i];
2196               constraints_data[total_counts] = 1.0;
2197               constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2198               constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2199               total_counts++;
2200               used_vertex = PETSC_TRUE;
2201             }
2202             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2203             k++;
2204           }
2205         }
2206       }
2207       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2208       n_vertices = total_counts;
2209     }
2210 
2211     /* edges and faces */
2212     total_counts_cc = total_counts;
2213     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2214       IS        used_is;
2215       PetscBool idxs_copied = PETSC_FALSE;
2216 
2217       if (ncc<n_ISForEdges) {
2218         used_is = ISForEdges[ncc];
2219         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2220       } else {
2221         used_is = ISForFaces[ncc-n_ISForEdges];
2222         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2223       }
2224       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2225 
2226       ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr);
2227       ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2228       /* change of basis should not be performed on local periodic nodes */
2229       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2230       if (nnsp_has_cnst) {
2231         PetscScalar quad_value;
2232 
2233         ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2234         idxs_copied = PETSC_TRUE;
2235 
2236         if (!pcbddc->use_nnsp_true) {
2237           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2238         } else {
2239           quad_value = 1.0;
2240         }
2241         for (j=0;j<size_of_constraint;j++) {
2242           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
2243         }
2244         temp_constraints++;
2245         total_counts++;
2246       }
2247       for (k=0;k<nnsp_size;k++) {
2248         PetscReal real_value;
2249         PetscScalar *ptr_to_data;
2250 
2251         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2252         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
2253         for (j=0;j<size_of_constraint;j++) {
2254           ptr_to_data[j] = array[is_indices[j]];
2255         }
2256         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2257         /* check if array is null on the connected component */
2258         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2259         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
2260         if (real_value > 0.0) { /* keep indices and values */
2261           temp_constraints++;
2262           total_counts++;
2263           if (!idxs_copied) {
2264             ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2265             idxs_copied = PETSC_TRUE;
2266           }
2267         }
2268       }
2269       ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2270       valid_constraints = temp_constraints;
2271       if (!pcbddc->use_nnsp_true && temp_constraints) {
2272         if (temp_constraints == 1) { /* just normalize the constraint */
2273           PetscScalar norm,*ptr_to_data;
2274 
2275           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2276           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2277           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
2278           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2279           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
2280         } else { /* perform SVD */
2281           PetscReal   tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2282           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2283 
2284 #if defined(PETSC_MISSING_LAPACK_GESVD)
2285           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2286              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2287              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2288                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2289                 from that computed using LAPACKgesvd
2290              -> This is due to a different computation of eigenvectors in LAPACKheev
2291              -> The quality of the POD-computed basis will be the same */
2292           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2293           /* Store upper triangular part of correlation matrix */
2294           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2295           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2296           for (j=0;j<temp_constraints;j++) {
2297             for (k=0;k<j+1;k++) {
2298               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
2299             }
2300           }
2301           /* compute eigenvalues and eigenvectors of correlation matrix */
2302           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2303           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2304 #if !defined(PETSC_USE_COMPLEX)
2305           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2306 #else
2307           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2308 #endif
2309           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2310           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2311           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2312           j = 0;
2313           while (j < temp_constraints && singular_vals[j] < tol) j++;
2314           total_counts = total_counts-j;
2315           valid_constraints = temp_constraints-j;
2316           /* scale and copy POD basis into used quadrature memory */
2317           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2318           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2319           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2320           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2321           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2322           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2323           if (j<temp_constraints) {
2324             PetscInt ii;
2325             for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
2326             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2327             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2328             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2329             for (k=0;k<temp_constraints-j;k++) {
2330               for (ii=0;ii<size_of_constraint;ii++) {
2331                 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2332               }
2333             }
2334           }
2335 #else  /* on missing GESVD */
2336           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2337           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2338           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2339           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2340 #if !defined(PETSC_USE_COMPLEX)
2341           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2342 #else
2343           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2344 #endif
2345           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2346           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2347           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2348           k = temp_constraints;
2349           if (k > size_of_constraint) k = size_of_constraint;
2350           j = 0;
2351           while (j < k && singular_vals[k-j-1] < tol) j++;
2352           valid_constraints = k-j;
2353           total_counts = total_counts-temp_constraints+valid_constraints;
2354 #endif /* on missing GESVD */
2355         }
2356       }
2357       /* update pointers information */
2358       if (valid_constraints) {
2359         constraints_n[total_counts_cc] = valid_constraints;
2360         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
2361         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
2362         /* set change_of_basis flag */
2363         if (boolforchange) {
2364           PetscBTSet(change_basis,total_counts_cc);
2365         }
2366         total_counts_cc++;
2367       }
2368     }
2369     /* free workspace */
2370     if (!skip_lapack) {
2371       ierr = PetscFree(work);CHKERRQ(ierr);
2372 #if defined(PETSC_USE_COMPLEX)
2373       ierr = PetscFree(rwork);CHKERRQ(ierr);
2374 #endif
2375       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2376 #if defined(PETSC_MISSING_LAPACK_GESVD)
2377       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2378       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2379 #endif
2380     }
2381     for (k=0;k<nnsp_size;k++) {
2382       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2383     }
2384     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2385     /* free index sets of faces, edges and vertices */
2386     for (i=0;i<n_ISForFaces;i++) {
2387       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2388     }
2389     if (n_ISForFaces) {
2390       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2391     }
2392     for (i=0;i<n_ISForEdges;i++) {
2393       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2394     }
2395     if (n_ISForEdges) {
2396       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2397     }
2398     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2399   } else {
2400     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2401 
2402     total_counts = 0;
2403     n_vertices = 0;
2404     if (sub_schurs->is_vertices && pcbddc->use_vertices) {
2405       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
2406     }
2407     max_constraints = 0;
2408     total_counts_cc = 0;
2409     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2410       total_counts += pcbddc->adaptive_constraints_n[i];
2411       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
2412       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2413     }
2414     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
2415     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
2416     constraints_idxs = pcbddc->adaptive_constraints_idxs;
2417     constraints_data = pcbddc->adaptive_constraints_data;
2418     /* constraints_n differs from pcbddc->adaptive_constraints_n */
2419     ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr);
2420     total_counts_cc = 0;
2421     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2422       if (pcbddc->adaptive_constraints_n[i]) {
2423         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
2424       }
2425     }
2426 #if 0
2427     printf("Found %d totals (%d)\n",total_counts_cc,total_counts);
2428     for (i=0;i<total_counts_cc;i++) {
2429       printf("const %d, start %d",i,constraints_idxs_ptr[i]);
2430       printf(" end %d:\n",constraints_idxs_ptr[i+1]);
2431       for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) {
2432         printf(" %d",constraints_idxs[j]);
2433       }
2434       printf("\n");
2435       printf("number of cc: %d\n",constraints_n[i]);
2436     }
2437     for (i=0;i<n_vertices;i++) {
2438       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
2439     }
2440     for (i=0;i<sub_schurs->n_subs;i++) {
2441       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2442     }
2443 #endif
2444 
2445     max_size_of_constraint = 0;
2446     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
2447     ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr);
2448     /* Change of basis */
2449     ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr);
2450     if (pcbddc->use_change_of_basis) {
2451       for (i=0;i<sub_schurs->n_subs;i++) {
2452         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2453           ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr);
2454         }
2455       }
2456     }
2457   }
2458   pcbddc->local_primal_size = total_counts;
2459   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2460 
2461   /* map constraints_idxs in boundary numbering */
2462   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr);
2463   if (i != constraints_idxs_ptr[total_counts_cc]) {
2464     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i);
2465   }
2466 
2467   /* Create constraint matrix */
2468   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2469   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2470   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2471 
2472   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2473   /* determine if a QR strategy is needed for change of basis */
2474   qr_needed = PETSC_FALSE;
2475   ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr);
2476   total_primal_vertices=0;
2477   pcbddc->local_primal_size_cc = 0;
2478   for (i=0;i<total_counts_cc;i++) {
2479     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2480     if (size_of_constraint == 1) {
2481       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
2482       pcbddc->local_primal_size_cc += 1;
2483     } else if (PetscBTLookup(change_basis,i)) {
2484       for (k=0;k<constraints_n[i];k++) {
2485         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2486       }
2487       pcbddc->local_primal_size_cc += constraints_n[i];
2488       if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) {
2489         PetscBTSet(qr_needed_idx,i);
2490         qr_needed = PETSC_TRUE;
2491       }
2492     } else {
2493       pcbddc->local_primal_size_cc += 1;
2494     }
2495   }
2496   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
2497   pcbddc->n_vertices = total_primal_vertices;
2498   /* permute indices in order to have a sorted set of vertices */
2499   ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2500 
2501   ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
2502   ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2503   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
2504 
2505   /* nonzero structure of constraint matrix */
2506   /* and get reference dof for local constraints */
2507   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2508   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
2509 
2510   j = total_primal_vertices;
2511   total_counts = total_primal_vertices;
2512   cum = total_primal_vertices;
2513   for (i=n_vertices;i<total_counts_cc;i++) {
2514     if (!PetscBTLookup(change_basis,i)) {
2515       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
2516       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
2517       cum++;
2518       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2519       for (k=0;k<constraints_n[i];k++) {
2520         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2521         nnz[j+k] = size_of_constraint;
2522       }
2523       j += constraints_n[i];
2524     }
2525   }
2526   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2527   ierr = PetscFree(nnz);CHKERRQ(ierr);
2528 
2529   /* set values in constraint matrix */
2530   for (i=0;i<total_primal_vertices;i++) {
2531     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2532   }
2533   total_counts = total_primal_vertices;
2534   for (i=n_vertices;i<total_counts_cc;i++) {
2535     if (!PetscBTLookup(change_basis,i)) {
2536       PetscInt *cols;
2537 
2538       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2539       cols = constraints_idxs+constraints_idxs_ptr[i];
2540       for (k=0;k<constraints_n[i];k++) {
2541         PetscInt    row = total_counts+k;
2542         PetscScalar *vals;
2543 
2544         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
2545         ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2546       }
2547       total_counts += constraints_n[i];
2548     }
2549   }
2550   /* assembling */
2551   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2552   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2553 
2554   /*
2555   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2556   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2557   */
2558   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2559   if (pcbddc->use_change_of_basis) {
2560     /* dual and primal dofs on a single cc */
2561     PetscInt     dual_dofs,primal_dofs;
2562     /* working stuff for GEQRF */
2563     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2564     PetscBLASInt lqr_work;
2565     /* working stuff for UNGQR */
2566     PetscScalar  *gqr_work,lgqr_work_t;
2567     PetscBLASInt lgqr_work;
2568     /* working stuff for TRTRS */
2569     PetscScalar  *trs_rhs;
2570     PetscBLASInt Blas_NRHS;
2571     /* pointers for values insertion into change of basis matrix */
2572     PetscInt     *start_rows,*start_cols;
2573     PetscScalar  *start_vals;
2574     /* working stuff for values insertion */
2575     PetscBT      is_primal;
2576     PetscInt     *aux_primal_numbering_B;
2577     /* matrix sizes */
2578     PetscInt     global_size,local_size;
2579     /* temporary change of basis */
2580     Mat          localChangeOfBasisMatrix;
2581     /* extra space for debugging */
2582     PetscScalar  *dbg_work;
2583 
2584     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2585     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2586     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2587     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2588     /* nonzeros for local mat */
2589     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2590     for (i=0;i<pcis->n;i++) nnz[i]=1;
2591     for (i=n_vertices;i<total_counts_cc;i++) {
2592       if (PetscBTLookup(change_basis,i)) {
2593         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2594         if (PetscBTLookup(qr_needed_idx,i)) {
2595           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
2596         } else {
2597           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
2598           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
2599         }
2600       }
2601     }
2602     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2603     ierr = PetscFree(nnz);CHKERRQ(ierr);
2604     /* Set initial identity in the matrix */
2605     for (i=0;i<pcis->n;i++) {
2606       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2607     }
2608 
2609     if (pcbddc->dbg_flag) {
2610       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2611       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2612     }
2613 
2614 
2615     /* Now we loop on the constraints which need a change of basis */
2616     /*
2617        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2618        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2619 
2620        Basic blocks of change of basis matrix T computed by
2621 
2622           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2623 
2624             | 1        0   ...        0         s_1/S |
2625             | 0        1   ...        0         s_2/S |
2626             |              ...                        |
2627             | 0        ...            1     s_{n-1}/S |
2628             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2629 
2630             with S = \sum_{i=1}^n s_i^2
2631             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2632                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2633 
2634           - QR decomposition of constraints otherwise
2635     */
2636     if (qr_needed) {
2637       /* space to store Q */
2638       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2639       /* first we issue queries for optimal work */
2640       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2641       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2642       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2643       lqr_work = -1;
2644       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2645       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2646       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2647       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2648       lgqr_work = -1;
2649       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2650       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2651       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2652       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2653       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2654       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2655       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2656       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2657       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2658       /* array to store scaling factors for reflectors */
2659       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2660       /* array to store rhs and solution of triangular solver */
2661       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2662       /* allocating workspace for check */
2663       if (pcbddc->dbg_flag) {
2664         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
2665       }
2666     }
2667     /* array to store whether a node is primal or not */
2668     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2669     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2670     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2671     if (i != total_primal_vertices) {
2672       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2673     }
2674     for (i=0;i<total_primal_vertices;i++) {
2675       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2676     }
2677     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2678 
2679     /* loop on constraints and see whether or not they need a change of basis and compute it */
2680     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
2681       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
2682       if (PetscBTLookup(change_basis,total_counts)) {
2683         /* get constraint info */
2684         primal_dofs = constraints_n[total_counts];
2685         dual_dofs = size_of_constraint-primal_dofs;
2686 
2687         if (pcbddc->dbg_flag) {
2688           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr);
2689         }
2690 
2691         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2692 
2693           /* copy quadrature constraints for change of basis check */
2694           if (pcbddc->dbg_flag) {
2695             ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2696           }
2697           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2698           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2699 
2700           /* compute QR decomposition of constraints */
2701           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2702           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2703           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2704           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2705           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2706           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2707           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2708 
2709           /* explictly compute R^-T */
2710           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2711           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2712           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2713           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2714           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2715           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2716           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2717           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2718           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2719           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2720 
2721           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2722           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2723           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2724           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2725           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2726           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2727           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2728           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2729           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2730 
2731           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2732              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2733              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2734           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2735           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2736           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2737           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2738           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2739           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2740           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2741           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
2742           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2743           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2744 
2745           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2746           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
2747           /* insert cols for primal dofs */
2748           for (j=0;j<primal_dofs;j++) {
2749             start_vals = &qr_basis[j*size_of_constraint];
2750             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2751             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2752           }
2753           /* insert cols for dual dofs */
2754           for (j=0,k=0;j<dual_dofs;k++) {
2755             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
2756               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2757               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2758               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2759               j++;
2760             }
2761           }
2762 
2763           /* check change of basis */
2764           if (pcbddc->dbg_flag) {
2765             PetscInt   ii,jj;
2766             PetscBool valid_qr=PETSC_TRUE;
2767             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2768             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2769             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2770             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2771             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2772             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2773             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2774             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
2775             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2776             for (jj=0;jj<size_of_constraint;jj++) {
2777               for (ii=0;ii<primal_dofs;ii++) {
2778                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2779                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2780               }
2781             }
2782             if (!valid_qr) {
2783               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2784               for (jj=0;jj<size_of_constraint;jj++) {
2785                 for (ii=0;ii<primal_dofs;ii++) {
2786                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2787                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2788                   }
2789                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2790                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2791                   }
2792                 }
2793               }
2794             } else {
2795               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2796             }
2797           }
2798         } else { /* simple transformation block */
2799           PetscInt    row,col;
2800           PetscScalar val,norm;
2801 
2802           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2803           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
2804           for (j=0;j<size_of_constraint;j++) {
2805             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
2806             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2807             if (!PetscBTLookup(is_primal,row_B)) {
2808               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
2809               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2810               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2811             } else {
2812               for (k=0;k<size_of_constraint;k++) {
2813                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2814                 if (row != col) {
2815                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
2816                 } else {
2817                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
2818                 }
2819                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2820               }
2821             }
2822           }
2823           if (pcbddc->dbg_flag) {
2824             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2825           }
2826         }
2827       } else {
2828         if (pcbddc->dbg_flag) {
2829           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr);
2830         }
2831       }
2832     }
2833 
2834     /* free workspace */
2835     if (qr_needed) {
2836       if (pcbddc->dbg_flag) {
2837         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
2838       }
2839       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2840       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2841       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2842       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2843       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2844     }
2845     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2846     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2847     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2848 
2849     /* assembling of global change of variable */
2850     {
2851       Mat      tmat;
2852       PetscInt bs;
2853 
2854       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2855       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2856       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2857       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2858       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2859       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2860       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
2861       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
2862       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2863       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2864       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2865       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2866       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2867       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2868       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2869       ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2870       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2871       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2872     }
2873     /* check */
2874     if (pcbddc->dbg_flag) {
2875       PetscReal error;
2876       Vec       x,x_change;
2877 
2878       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2879       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2880       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2881       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2882       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2883       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2884       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2885       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2886       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2887       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2888       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2889       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2890       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2891       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2892       ierr = VecDestroy(&x);CHKERRQ(ierr);
2893       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2894     }
2895 
2896     /* adapt sub_schurs computed (if any) */
2897     if (pcbddc->use_deluxe_scaling) {
2898       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2899       if (sub_schurs->S_Ej_all) {
2900         Mat S_new,tmat;
2901         IS is_all_N;
2902 
2903         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2904         ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
2905         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2906         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2907         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2908         ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2909         sub_schurs->S_Ej_all = S_new;
2910         ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2911         if (sub_schurs->sum_S_Ej_all) {
2912           ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2913           ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2914           ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2915           sub_schurs->sum_S_Ej_all = S_new;
2916           ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2917         }
2918         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2919       }
2920     }
2921     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2922   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2923     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2924     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2925   }
2926 
2927   /* set up change of basis context */
2928   if (pcbddc->ChangeOfBasisMatrix) {
2929     PCBDDCChange_ctx change_ctx;
2930 
2931     if (!pcbddc->new_global_mat) {
2932       PetscInt global_size,local_size;
2933 
2934       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2935       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2936       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2937       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2938       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2939       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2940       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2941       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2942       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2943     } else {
2944       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2945       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2946       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2947     }
2948     if (!pcbddc->user_ChangeOfBasisMatrix) {
2949       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2950       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2951     } else {
2952       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2953       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2954     }
2955     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2956     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2957     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2958     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2959   }
2960 
2961   /* check if a new primal space has been introduced */
2962   pcbddc->new_primal_space_local = PETSC_TRUE;
2963   if (olocal_primal_size == pcbddc->local_primal_size) {
2964     ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2965     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2966     if (!pcbddc->new_primal_space_local) {
2967       ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2968       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2969     }
2970   }
2971   ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr);
2972   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2973   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2974 
2975   /* flush dbg viewer */
2976   if (pcbddc->dbg_flag) {
2977     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2978   }
2979 
2980   /* free workspace */
2981   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2982   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2983   if (!pcbddc->adaptive_selection) {
2984     ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr);
2985     ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr);
2986   } else {
2987     ierr = PetscFree5(pcbddc->adaptive_constraints_n,
2988                       pcbddc->adaptive_constraints_idxs_ptr,
2989                       pcbddc->adaptive_constraints_data_ptr,
2990                       pcbddc->adaptive_constraints_idxs,
2991                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2992     ierr = PetscFree(constraints_n);CHKERRQ(ierr);
2993     ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr);
2994   }
2995   PetscFunctionReturn(0);
2996 }
2997 
2998 #undef __FUNCT__
2999 #define __FUNCT__ "PCBDDCAnalyzeInterface"
3000 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
3001 {
3002   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
3003   PC_IS       *pcis = (PC_IS*)pc->data;
3004   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
3005   PetscInt    ierr,i,vertex_size,N;
3006   PetscViewer viewer=pcbddc->dbg_viewer;
3007 
3008   PetscFunctionBegin;
3009   /* Reset previously computed graph */
3010   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
3011   /* Init local Graph struct */
3012   ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr);
3013   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);CHKERRQ(ierr);
3014 
3015   /* Check validity of the csr graph passed in by the user */
3016   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
3017     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
3018   }
3019 
3020   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
3021   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
3022     PetscInt  *xadj,*adjncy;
3023     PetscInt  nvtxs;
3024     PetscBool flg_row=PETSC_FALSE;
3025 
3026     if (pcbddc->use_local_adj) {
3027 
3028       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3029       if (flg_row) {
3030         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
3031         pcbddc->computed_rowadj = PETSC_TRUE;
3032       }
3033       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3034     } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */
3035       IS                     is_dummy;
3036       ISLocalToGlobalMapping l2gmap_dummy;
3037       PetscInt               j,sum;
3038       PetscInt               *cxadj,*cadjncy;
3039       const PetscInt         *idxs;
3040       PCBDDCGraph            graph;
3041       PetscBT                is_on_boundary;
3042 
3043       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
3044       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
3045       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
3046       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
3047       ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr);
3048       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
3049       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3050       if (flg_row) {
3051         graph->xadj = xadj;
3052         graph->adjncy = adjncy;
3053       }
3054       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
3055       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
3056       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3057 
3058       if (pcbddc->dbg_flag) {
3059         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr);
3060         for (i=0;i<graph->ncc;i++) {
3061           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
3062         }
3063       }
3064 
3065       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
3066       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3067       for (i=0;i<pcis->n_B;i++) {
3068         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
3069       }
3070       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3071 
3072       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
3073       sum = 0;
3074       for (i=0;i<graph->ncc;i++) {
3075         PetscInt sizecc = 0;
3076         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3077           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3078             sizecc++;
3079           }
3080         }
3081         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3082           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3083             cxadj[graph->queue[j]] = sizecc;
3084           }
3085         }
3086         sum += sizecc*sizecc;
3087       }
3088       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
3089       sum = 0;
3090       for (i=0;i<pcis->n;i++) {
3091         PetscInt temp = cxadj[i];
3092         cxadj[i] = sum;
3093         sum += temp;
3094       }
3095       cxadj[pcis->n] = sum;
3096       for (i=0;i<graph->ncc;i++) {
3097         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3098           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3099             PetscInt k,sizecc = 0;
3100             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
3101               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
3102                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
3103                 sizecc++;
3104               }
3105             }
3106           }
3107         }
3108       }
3109       if (sum) {
3110         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
3111       } else {
3112         ierr = PetscFree(cxadj);CHKERRQ(ierr);
3113         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
3114       }
3115       graph->xadj = 0;
3116       graph->adjncy = 0;
3117       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
3118       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
3119     }
3120   }
3121   if (pcbddc->dbg_flag) {
3122     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3123   }
3124 
3125   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
3126   vertex_size = 1;
3127   if (pcbddc->user_provided_isfordofs) {
3128     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
3129       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3130       for (i=0;i<pcbddc->n_ISForDofs;i++) {
3131         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3132         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
3133       }
3134       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
3135       pcbddc->n_ISForDofs = 0;
3136       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
3137     }
3138     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
3139     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
3140   } else {
3141     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
3142       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
3143       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3144       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3145         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3146       }
3147     }
3148   }
3149 
3150   /* Setup of Graph */
3151   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3152     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
3153   }
3154   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3155     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
3156   }
3157   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr);
3158 
3159   /* Graph's connected components analysis */
3160   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
3161 
3162   /* print some info to stdout */
3163   if (pcbddc->dbg_flag) {
3164     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr);
3165   }
3166 
3167   /* mark topography has done */
3168   pcbddc->recompute_topography = PETSC_FALSE;
3169   PetscFunctionReturn(0);
3170 }
3171 
3172 #undef __FUNCT__
3173 #define __FUNCT__ "PCBDDCSubsetNumbering"
3174 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
3175 {
3176   Vec            local_vec,global_vec;
3177   IS             seqis,paris;
3178   VecScatter     scatter_ctx;
3179   PetscScalar    *array;
3180   PetscInt       *temp_global_dofs;
3181   PetscScalar    globalsum;
3182   PetscInt       i,j,s;
3183   PetscInt       nlocals,first_index,old_index,max_local,max_global;
3184   PetscMPIInt    rank_prec_comm,size_prec_comm;
3185   PetscInt       *dof_sizes,*dof_displs;
3186   PetscBool      first_found;
3187   PetscErrorCode ierr;
3188 
3189   PetscFunctionBegin;
3190   /* mpi buffers */
3191   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
3192   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
3193   j = ( !rank_prec_comm ? size_prec_comm : 0);
3194   ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr);
3195   /* get maximum size of subset */
3196   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
3197   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
3198   max_local = 0;
3199   for (i=0;i<n_local_dofs;i++) {
3200     if (max_local < temp_global_dofs[i] ) {
3201       max_local = temp_global_dofs[i];
3202     }
3203   }
3204   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
3205   max_global++;
3206   max_local = 0;
3207   for (i=0;i<n_local_dofs;i++) {
3208     if (max_local < local_dofs[i] ) {
3209       max_local = local_dofs[i];
3210     }
3211   }
3212   max_local++;
3213   /* allocate workspace */
3214   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
3215   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
3216   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
3217   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
3218   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
3219   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
3220   /* create scatter */
3221   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
3222   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
3223   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
3224   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
3225   ierr = ISDestroy(&paris);CHKERRQ(ierr);
3226   /* init array */
3227   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
3228   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3229   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3230   if (local_dofs_mult) {
3231     for (i=0;i<n_local_dofs;i++) {
3232       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
3233     }
3234   } else {
3235     for (i=0;i<n_local_dofs;i++) {
3236       array[local_dofs[i]]=1.0;
3237     }
3238   }
3239   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3240   /* scatter into global vec and get total number of global dofs */
3241   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3242   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3243   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
3244   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
3245   /* Fill global_vec with cumulative function for global numbering */
3246   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
3247   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
3248   nlocals = 0;
3249   first_index = -1;
3250   first_found = PETSC_FALSE;
3251   for (i=0;i<s;i++) {
3252     if (!first_found && PetscRealPart(array[i]) > 0.1) {
3253       first_found = PETSC_TRUE;
3254       first_index = i;
3255     }
3256     nlocals += (PetscInt)PetscRealPart(array[i]);
3257   }
3258   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3259   if (!rank_prec_comm) {
3260     dof_displs[0]=0;
3261     for (i=1;i<size_prec_comm;i++) {
3262       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3263     }
3264   }
3265   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3266   if (first_found) {
3267     array[first_index] += (PetscScalar)nlocals;
3268     old_index = first_index;
3269     for (i=first_index+1;i<s;i++) {
3270       if (PetscRealPart(array[i]) > 0.1) {
3271         array[i] += array[old_index];
3272         old_index = i;
3273       }
3274     }
3275   }
3276   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
3277   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3278   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3279   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3280   /* get global ordering of local dofs */
3281   ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3282   if (local_dofs_mult) {
3283     for (i=0;i<n_local_dofs;i++) {
3284       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
3285     }
3286   } else {
3287     for (i=0;i<n_local_dofs;i++) {
3288       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
3289     }
3290   }
3291   ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3292   /* free workspace */
3293   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
3294   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
3295   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
3296   ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr);
3297   /* return pointer to global ordering of local dofs */
3298   *global_numbering_subset = temp_global_dofs;
3299   PetscFunctionReturn(0);
3300 }
3301 
3302 #undef __FUNCT__
3303 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3304 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3305 {
3306   PetscInt       i,j;
3307   PetscScalar    *alphas;
3308   PetscErrorCode ierr;
3309 
3310   PetscFunctionBegin;
3311   /* this implements stabilized Gram-Schmidt */
3312   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3313   for (i=0;i<n;i++) {
3314     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3315     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3316     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3317   }
3318   ierr = PetscFree(alphas);CHKERRQ(ierr);
3319   PetscFunctionReturn(0);
3320 }
3321 
3322 #undef __FUNCT__
3323 #define __FUNCT__ "MatISGetSubassemblingPattern"
3324 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
3325 {
3326   Mat             subdomain_adj;
3327   IS              new_ranks,ranks_send_to;
3328   MatPartitioning partitioner;
3329   Mat_IS          *matis;
3330   PetscInt        n_neighs,*neighs,*n_shared,**shared;
3331   PetscInt        prank;
3332   PetscMPIInt     size,rank,color;
3333   PetscInt        *xadj,*adjncy,*oldranks;
3334   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
3335   PetscInt        i,local_size,threshold=0;
3336   PetscErrorCode  ierr;
3337   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3338   PetscSubcomm    subcomm;
3339 
3340   PetscFunctionBegin;
3341   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3342   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3343   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3344 
3345   /* Get info on mapping */
3346   matis = (Mat_IS*)(mat->data);
3347   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3348   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3349 
3350   /* build local CSR graph of subdomains' connectivity */
3351   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3352   xadj[0] = 0;
3353   xadj[1] = PetscMax(n_neighs-1,0);
3354   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3355   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3356 
3357   if (threshold) {
3358     PetscInt xadj_count = 0;
3359     for (i=1;i<n_neighs;i++) {
3360       if (n_shared[i] > threshold) {
3361         adjncy[xadj_count] = neighs[i];
3362         adjncy_wgt[xadj_count] = n_shared[i];
3363         xadj_count++;
3364       }
3365     }
3366     xadj[1] = xadj_count;
3367   } else {
3368     if (xadj[1]) {
3369       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3370       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3371     }
3372   }
3373   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3374   if (use_square) {
3375     for (i=0;i<xadj[1];i++) {
3376       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3377     }
3378   }
3379   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3380 
3381   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3382 
3383   /*
3384     Restrict work on active processes only.
3385   */
3386   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3387   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3388   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3389   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3390   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3391   if (color) {
3392     ierr = PetscFree(xadj);CHKERRQ(ierr);
3393     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3394     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3395   } else {
3396     PetscInt coarsening_ratio;
3397     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
3398     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3399     prank = rank;
3400     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
3401     /*
3402     for (i=0;i<size;i++) {
3403       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3404     }
3405     */
3406     for (i=0;i<xadj[1];i++) {
3407       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3408     }
3409     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3410     ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3411     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3412 
3413     /* Partition */
3414     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
3415     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3416     if (use_vwgt) {
3417       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3418       v_wgt[0] = local_size;
3419       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3420     }
3421     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3422     coarsening_ratio = size/n_subdomains;
3423     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3424     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3425     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3426     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3427 
3428     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3429     if (contiguous) {
3430       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3431     } else {
3432       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3433     }
3434     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3435     /* clean up */
3436     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3437     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3438     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3439     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3440   }
3441   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3442 
3443   /* assemble parallel IS for sends */
3444   i = 1;
3445   if (color) i=0;
3446   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3447 
3448   /* get back IS */
3449   *is_sends = ranks_send_to;
3450   PetscFunctionReturn(0);
3451 }
3452 
3453 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3454 
3455 #undef __FUNCT__
3456 #define __FUNCT__ "MatISSubassemble"
3457 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3458 {
3459   Mat                    local_mat;
3460   Mat_IS                 *matis;
3461   IS                     is_sends_internal;
3462   PetscInt               rows,cols,new_local_rows;
3463   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3464   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3465   ISLocalToGlobalMapping l2gmap;
3466   PetscInt*              l2gmap_indices;
3467   const PetscInt*        is_indices;
3468   MatType                new_local_type;
3469   /* buffers */
3470   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3471   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3472   PetscInt               *recv_buffer_idxs_local;
3473   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3474   /* MPI */
3475   MPI_Comm               comm,comm_n;
3476   PetscSubcomm           subcomm;
3477   PetscMPIInt            n_sends,n_recvs,commsize;
3478   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3479   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3480   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3481   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3482   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3483   PetscErrorCode         ierr;
3484 
3485   PetscFunctionBegin;
3486   /* TODO: add missing checks */
3487   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3488   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3489   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3490   PetscValidLogicalCollectiveInt(mat,nis,7);
3491   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3492   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3493   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3494   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3495   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3496   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3497   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3498   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3499     PetscInt mrows,mcols,mnrows,mncols;
3500     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3501     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3502     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3503     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3504     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3505     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3506   }
3507   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3508   PetscValidLogicalCollectiveInt(mat,bs,0);
3509   /* prepare IS for sending if not provided */
3510   if (!is_sends) {
3511     PetscBool pcontig = PETSC_TRUE;
3512     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3513     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3514   } else {
3515     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3516     is_sends_internal = is_sends;
3517   }
3518 
3519   /* get pointer of MATIS data */
3520   matis = (Mat_IS*)mat->data;
3521 
3522   /* get comm */
3523   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3524 
3525   /* compute number of sends */
3526   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3527   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3528 
3529   /* compute number of receives */
3530   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3531   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3532   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3533   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3534   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3535   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3536   ierr = PetscFree(iflags);CHKERRQ(ierr);
3537 
3538   /* restrict comm if requested */
3539   subcomm = 0;
3540   destroy_mat = PETSC_FALSE;
3541   if (restrict_comm) {
3542     PetscMPIInt color,subcommsize;
3543 
3544     color = 0;
3545     if (!n_recvs && n_sends) color = 1; /* processes sending and not receiving anything will not partecipate in new comm */
3546     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3547     subcommsize = commsize - subcommsize;
3548     /* check if reuse has been requested */
3549     if (reuse == MAT_REUSE_MATRIX) {
3550       if (*mat_n) {
3551         PetscMPIInt subcommsize2;
3552         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3553         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3554         comm_n = PetscObjectComm((PetscObject)*mat_n);
3555       } else {
3556         comm_n = PETSC_COMM_SELF;
3557       }
3558     } else { /* MAT_INITIAL_MATRIX */
3559       PetscMPIInt rank;
3560 
3561       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3562       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3563       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3564       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3565       comm_n = PetscSubcommChild(subcomm);
3566     }
3567     /* flag to destroy *mat_n if not significative */
3568     if (color) destroy_mat = PETSC_TRUE;
3569   } else {
3570     comm_n = comm;
3571   }
3572 
3573   /* prepare send/receive buffers */
3574   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3575   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3576   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3577   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3578   if (nis) {
3579     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3580   }
3581 
3582   /* Get data from local matrices */
3583   if (!isdense) {
3584     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3585     /* TODO: See below some guidelines on how to prepare the local buffers */
3586     /*
3587        send_buffer_vals should contain the raw values of the local matrix
3588        send_buffer_idxs should contain:
3589        - MatType_PRIVATE type
3590        - PetscInt        size_of_l2gmap
3591        - PetscInt        global_row_indices[size_of_l2gmap]
3592        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3593     */
3594   } else {
3595     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3596     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3597     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3598     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3599     send_buffer_idxs[1] = i;
3600     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3601     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3602     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3603     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3604     for (i=0;i<n_sends;i++) {
3605       ilengths_vals[is_indices[i]] = len*len;
3606       ilengths_idxs[is_indices[i]] = len+2;
3607     }
3608   }
3609   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3610   /* additional is (if any) */
3611   if (nis) {
3612     PetscMPIInt psum;
3613     PetscInt j;
3614     for (j=0,psum=0;j<nis;j++) {
3615       PetscInt plen;
3616       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3617       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3618       psum += len+1; /* indices + lenght */
3619     }
3620     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3621     for (j=0,psum=0;j<nis;j++) {
3622       PetscInt plen;
3623       const PetscInt *is_array_idxs;
3624       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3625       send_buffer_idxs_is[psum] = plen;
3626       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3627       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3628       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3629       psum += plen+1; /* indices + lenght */
3630     }
3631     for (i=0;i<n_sends;i++) {
3632       ilengths_idxs_is[is_indices[i]] = psum;
3633     }
3634     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3635   }
3636 
3637   buf_size_idxs = 0;
3638   buf_size_vals = 0;
3639   buf_size_idxs_is = 0;
3640   for (i=0;i<n_recvs;i++) {
3641     buf_size_idxs += (PetscInt)olengths_idxs[i];
3642     buf_size_vals += (PetscInt)olengths_vals[i];
3643     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3644   }
3645   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3646   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3647   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3648 
3649   /* get new tags for clean communications */
3650   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3651   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3652   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3653 
3654   /* allocate for requests */
3655   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3656   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3657   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3658   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3659   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3660   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3661 
3662   /* communications */
3663   ptr_idxs = recv_buffer_idxs;
3664   ptr_vals = recv_buffer_vals;
3665   ptr_idxs_is = recv_buffer_idxs_is;
3666   for (i=0;i<n_recvs;i++) {
3667     source_dest = onodes[i];
3668     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3669     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3670     ptr_idxs += olengths_idxs[i];
3671     ptr_vals += olengths_vals[i];
3672     if (nis) {
3673       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3674       ptr_idxs_is += olengths_idxs_is[i];
3675     }
3676   }
3677   for (i=0;i<n_sends;i++) {
3678     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3679     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3680     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3681     if (nis) {
3682       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3683     }
3684   }
3685   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3686   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3687 
3688   /* assemble new l2g map */
3689   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3690   ptr_idxs = recv_buffer_idxs;
3691   new_local_rows = 0;
3692   for (i=0;i<n_recvs;i++) {
3693     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3694     ptr_idxs += olengths_idxs[i];
3695   }
3696   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3697   ptr_idxs = recv_buffer_idxs;
3698   new_local_rows = 0;
3699   for (i=0;i<n_recvs;i++) {
3700     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3701     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3702     ptr_idxs += olengths_idxs[i];
3703   }
3704   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3705   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3706   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3707 
3708   /* infer new local matrix type from received local matrices type */
3709   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3710   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3711   if (n_recvs) {
3712     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3713     ptr_idxs = recv_buffer_idxs;
3714     for (i=0;i<n_recvs;i++) {
3715       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3716         new_local_type_private = MATAIJ_PRIVATE;
3717         break;
3718       }
3719       ptr_idxs += olengths_idxs[i];
3720     }
3721     switch (new_local_type_private) {
3722       case MATDENSE_PRIVATE:
3723         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3724           new_local_type = MATSEQAIJ;
3725           bs = 1;
3726         } else { /* if I receive only 1 dense matrix */
3727           new_local_type = MATSEQDENSE;
3728           bs = 1;
3729         }
3730         break;
3731       case MATAIJ_PRIVATE:
3732         new_local_type = MATSEQAIJ;
3733         bs = 1;
3734         break;
3735       case MATBAIJ_PRIVATE:
3736         new_local_type = MATSEQBAIJ;
3737         break;
3738       case MATSBAIJ_PRIVATE:
3739         new_local_type = MATSEQSBAIJ;
3740         break;
3741       default:
3742         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3743         break;
3744     }
3745   } else { /* by default, new_local_type is seqdense */
3746     new_local_type = MATSEQDENSE;
3747     bs = 1;
3748   }
3749 
3750   /* create MATIS object if needed */
3751   if (reuse == MAT_INITIAL_MATRIX) {
3752     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3753     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3754   } else {
3755     /* it also destroys the local matrices */
3756     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3757   }
3758   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3759   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3760 
3761   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3762 
3763   /* Global to local map of received indices */
3764   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3765   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3766   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3767 
3768   /* restore attributes -> type of incoming data and its size */
3769   buf_size_idxs = 0;
3770   for (i=0;i<n_recvs;i++) {
3771     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3772     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3773     buf_size_idxs += (PetscInt)olengths_idxs[i];
3774   }
3775   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3776 
3777   /* set preallocation */
3778   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3779   if (!newisdense) {
3780     PetscInt *new_local_nnz=0;
3781 
3782     ptr_vals = recv_buffer_vals;
3783     ptr_idxs = recv_buffer_idxs_local;
3784     if (n_recvs) {
3785       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3786     }
3787     for (i=0;i<n_recvs;i++) {
3788       PetscInt j;
3789       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3790         for (j=0;j<*(ptr_idxs+1);j++) {
3791           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3792         }
3793       } else {
3794         /* TODO */
3795       }
3796       ptr_idxs += olengths_idxs[i];
3797     }
3798     if (new_local_nnz) {
3799       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3800       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3801       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3802       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3803       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3804       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3805     } else {
3806       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3807     }
3808     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3809   } else {
3810     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3811   }
3812 
3813   /* set values */
3814   ptr_vals = recv_buffer_vals;
3815   ptr_idxs = recv_buffer_idxs_local;
3816   for (i=0;i<n_recvs;i++) {
3817     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3818       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3819       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3820       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3821       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3822       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3823     } else {
3824       /* TODO */
3825     }
3826     ptr_idxs += olengths_idxs[i];
3827     ptr_vals += olengths_vals[i];
3828   }
3829   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3830   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3831   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3832   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3833   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3834   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3835 
3836 #if 0
3837   if (!restrict_comm) { /* check */
3838     Vec       lvec,rvec;
3839     PetscReal infty_error;
3840 
3841     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3842     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3843     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3844     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3845     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3846     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3847     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3848     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3849     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3850   }
3851 #endif
3852 
3853   /* assemble new additional is (if any) */
3854   if (nis) {
3855     PetscInt **temp_idxs,*count_is,j,psum;
3856 
3857     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3858     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3859     ptr_idxs = recv_buffer_idxs_is;
3860     psum = 0;
3861     for (i=0;i<n_recvs;i++) {
3862       for (j=0;j<nis;j++) {
3863         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3864         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3865         psum += plen;
3866         ptr_idxs += plen+1; /* shift pointer to received data */
3867       }
3868     }
3869     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3870     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3871     for (i=1;i<nis;i++) {
3872       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3873     }
3874     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3875     ptr_idxs = recv_buffer_idxs_is;
3876     for (i=0;i<n_recvs;i++) {
3877       for (j=0;j<nis;j++) {
3878         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3879         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3880         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3881         ptr_idxs += plen+1; /* shift pointer to received data */
3882       }
3883     }
3884     for (i=0;i<nis;i++) {
3885       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3886       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3887       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3888     }
3889     ierr = PetscFree(count_is);CHKERRQ(ierr);
3890     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3891     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3892   }
3893   /* free workspace */
3894   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3895   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3896   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3897   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3898   if (isdense) {
3899     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3900     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3901   } else {
3902     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3903   }
3904   if (nis) {
3905     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3906     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3907   }
3908   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3909   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3910   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3911   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3912   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3913   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3914   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3915   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3916   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3917   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3918   ierr = PetscFree(onodes);CHKERRQ(ierr);
3919   if (nis) {
3920     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3921     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3922     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3923   }
3924   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3925   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3926     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3927     for (i=0;i<nis;i++) {
3928       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3929     }
3930   }
3931   PetscFunctionReturn(0);
3932 }
3933 
3934 /* temporary hack into ksp private data structure */
3935 #include <petsc/private/kspimpl.h>
3936 
3937 #undef __FUNCT__
3938 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3939 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3940 {
3941   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3942   PC_IS                  *pcis = (PC_IS*)pc->data;
3943   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3944   MatNullSpace           CoarseNullSpace=NULL;
3945   ISLocalToGlobalMapping coarse_islg;
3946   IS                     coarse_is,*isarray;
3947   PetscInt               i,im_active=-1,active_procs=-1;
3948   PetscInt               nis,nisdofs,nisneu;
3949   PC                     pc_temp;
3950   PCType                 coarse_pc_type;
3951   KSPType                coarse_ksp_type;
3952   PetscBool              multilevel_requested,multilevel_allowed;
3953   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3954   Mat                    t_coarse_mat_is;
3955   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3956   PetscMPIInt            all_procs;
3957   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3958   PetscBool              compute_vecs = PETSC_FALSE;
3959   PetscScalar            *array;
3960   PetscErrorCode         ierr;
3961 
3962   PetscFunctionBegin;
3963   /* Assign global numbering to coarse dofs */
3964   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3965     PetscInt ocoarse_size;
3966     compute_vecs = PETSC_TRUE;
3967     ocoarse_size = pcbddc->coarse_size;
3968     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3969     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3970     /* see if we can avoid some work */
3971     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3972       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3973         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3974         coarse_reuse = PETSC_FALSE;
3975       } else { /* we can safely reuse already computed coarse matrix */
3976         coarse_reuse = PETSC_TRUE;
3977       }
3978     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3979       coarse_reuse = PETSC_FALSE;
3980     }
3981     /* reset any subassembling information */
3982     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3983     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3984   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3985     coarse_reuse = PETSC_TRUE;
3986   }
3987 
3988   /* count "active" (i.e. with positive local size) and "void" processes */
3989   im_active = !!(pcis->n);
3990   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3991   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3992   void_procs = all_procs-active_procs;
3993   csin_type_simple = PETSC_TRUE;
3994   redist = PETSC_FALSE;
3995   if (pcbddc->current_level && void_procs) {
3996     csin_ml = PETSC_TRUE;
3997     ncoarse_ml = void_procs;
3998     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
3999     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
4000       csin_ds = PETSC_TRUE;
4001       ncoarse_ds = pcbddc->redistribute_coarse;
4002       redist = PETSC_TRUE;
4003     } else {
4004       csin_ds = PETSC_TRUE;
4005       ncoarse_ds = active_procs;
4006       redist = PETSC_TRUE;
4007     }
4008   } else {
4009     csin_ml = PETSC_FALSE;
4010     ncoarse_ml = all_procs;
4011     if (void_procs) {
4012       csin_ds = PETSC_TRUE;
4013       ncoarse_ds = void_procs;
4014       csin_type_simple = PETSC_FALSE;
4015     } else {
4016       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
4017         csin_ds = PETSC_TRUE;
4018         ncoarse_ds = pcbddc->redistribute_coarse;
4019         redist = PETSC_TRUE;
4020       } else {
4021         csin_ds = PETSC_FALSE;
4022         ncoarse_ds = all_procs;
4023       }
4024     }
4025   }
4026 
4027   /*
4028     test if we can go multilevel: three conditions must be satisfied:
4029     - we have not exceeded the number of levels requested
4030     - we can actually subassemble the active processes
4031     - we can find a suitable number of MPI processes where we can place the subassembled problem
4032   */
4033   multilevel_allowed = PETSC_FALSE;
4034   multilevel_requested = PETSC_FALSE;
4035   if (pcbddc->current_level < pcbddc->max_levels) {
4036     multilevel_requested = PETSC_TRUE;
4037     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
4038       multilevel_allowed = PETSC_FALSE;
4039     } else {
4040       multilevel_allowed = PETSC_TRUE;
4041     }
4042   }
4043   /* determine number of process partecipating to coarse solver */
4044   if (multilevel_allowed) {
4045     ncoarse = ncoarse_ml;
4046     csin = csin_ml;
4047     redist = PETSC_FALSE;
4048   } else {
4049     ncoarse = ncoarse_ds;
4050     csin = csin_ds;
4051   }
4052 
4053   /* creates temporary l2gmap and IS for coarse indexes */
4054   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
4055   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
4056 
4057   /* creates temporary MATIS object for coarse matrix */
4058   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
4059   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4060   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
4061   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4062 #if 0
4063   {
4064     PetscViewer viewer;
4065     char filename[256];
4066     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
4067     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
4068     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4069     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
4070     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4071   }
4072 #endif
4073   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
4074   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
4075   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4076   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4077   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
4078 
4079   /* compute dofs splitting and neumann boundaries for coarse dofs */
4080   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
4081     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
4082     const PetscInt         *idxs;
4083     ISLocalToGlobalMapping tmap;
4084 
4085     /* create map between primal indices (in local representative ordering) and local primal numbering */
4086     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
4087     /* allocate space for temporary storage */
4088     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
4089     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
4090     /* allocate for IS array */
4091     nisdofs = pcbddc->n_ISForDofsLocal;
4092     nisneu = !!pcbddc->NeumannBoundariesLocal;
4093     nis = nisdofs + nisneu;
4094     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
4095     /* dofs splitting */
4096     for (i=0;i<nisdofs;i++) {
4097       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4098       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4099       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4100       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4101       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4102       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4103       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4104       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4105     }
4106     /* neumann boundaries */
4107     if (pcbddc->NeumannBoundariesLocal) {
4108       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4109       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4110       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4111       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4112       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4113       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4114       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4115       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4116     }
4117     /* free memory */
4118     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4119     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4120     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4121   } else {
4122     nis = 0;
4123     nisdofs = 0;
4124     nisneu = 0;
4125     isarray = NULL;
4126   }
4127   /* destroy no longer needed map */
4128   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4129 
4130   /* restrict on coarse candidates (if needed) */
4131   coarse_mat_is = NULL;
4132   if (csin) {
4133     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4134       if (redist) {
4135         PetscMPIInt rank;
4136         PetscInt    spc,n_spc_p1,dest[1],destsize;
4137 
4138         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4139         spc = active_procs/ncoarse;
4140         n_spc_p1 = active_procs%ncoarse;
4141         if (im_active) {
4142           destsize = 1;
4143           if (rank > n_spc_p1*(spc+1)-1) {
4144             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4145           } else {
4146             dest[0] = rank/(spc+1);
4147           }
4148         } else {
4149           destsize = 0;
4150         }
4151         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4152       } else if (csin_type_simple) {
4153         PetscMPIInt rank;
4154         PetscInt    issize,isidx;
4155 
4156         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4157         if (im_active) {
4158           issize = 1;
4159           isidx = (PetscInt)rank;
4160         } else {
4161           issize = 0;
4162           isidx = -1;
4163         }
4164         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4165       } else { /* get a suitable subassembling pattern from MATIS code */
4166         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4167       }
4168 
4169       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4170       if (!redist || ncoarse <= void_procs) {
4171         PetscInt ncoarse_cand,tissize,*nisindices;
4172         PetscInt *coarse_candidates;
4173         const PetscInt* tisindices;
4174 
4175         /* get coarse candidates' ranks in pc communicator */
4176         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4177         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4178         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4179           if (!coarse_candidates[i]) {
4180             coarse_candidates[ncoarse_cand++]=i;
4181           }
4182         }
4183         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4184 
4185 
4186         if (pcbddc->dbg_flag) {
4187           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4188           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4189           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4190           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4191           for (i=0;i<ncoarse_cand;i++) {
4192             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4193           }
4194           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4195           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4196         }
4197         /* shift the pattern on coarse candidates */
4198         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4199         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4200         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4201         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4202         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4203         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4204         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4205       }
4206       if (pcbddc->dbg_flag) {
4207         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4208         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4209         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4210         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4211       }
4212     }
4213     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4214     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4215   } else {
4216     if (pcbddc->dbg_flag) {
4217       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4218       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4219       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4220     }
4221     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4222     coarse_mat_is = t_coarse_mat_is;
4223   }
4224 
4225   /* create local to global scatters for coarse problem */
4226   if (compute_vecs) {
4227     PetscInt lrows;
4228     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4229     if (coarse_mat_is) {
4230       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4231     } else {
4232       lrows = 0;
4233     }
4234     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4235     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4236     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4237     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4238     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4239   }
4240   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4241   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4242 
4243   /* set defaults for coarse KSP and PC */
4244   if (multilevel_allowed) {
4245     coarse_ksp_type = KSPRICHARDSON;
4246     coarse_pc_type = PCBDDC;
4247   } else {
4248     coarse_ksp_type = KSPPREONLY;
4249     coarse_pc_type = PCREDUNDANT;
4250   }
4251 
4252   /* print some info if requested */
4253   if (pcbddc->dbg_flag) {
4254     if (!multilevel_allowed) {
4255       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4256       if (multilevel_requested) {
4257         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4258       } else if (pcbddc->max_levels) {
4259         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4260       }
4261       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4262     }
4263   }
4264 
4265   /* create the coarse KSP object only once with defaults */
4266   if (coarse_mat_is) {
4267     MatReuse coarse_mat_reuse;
4268     PetscViewer dbg_viewer = NULL;
4269     if (pcbddc->dbg_flag) {
4270       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4271       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4272     }
4273     if (!pcbddc->coarse_ksp) {
4274       char prefix[256],str_level[16];
4275       size_t len;
4276       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4277       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4278       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4279       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4280       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4281       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4282       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4283       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4284       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4285       /* prefix */
4286       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4287       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4288       if (!pcbddc->current_level) {
4289         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4290         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4291       } else {
4292         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4293         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4294         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4295         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4296         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4297         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4298       }
4299       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4300       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4301       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4302       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4303       ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4304       /* allow user customization */
4305       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4306     }
4307     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4308     if (nisdofs) {
4309       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4310       for (i=0;i<nisdofs;i++) {
4311         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4312       }
4313     }
4314     if (nisneu) {
4315       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4316       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4317     }
4318 
4319     /* get some info after set from options */
4320     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4321     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4322     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4323     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4324     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4325       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4326       isbddc = PETSC_FALSE;
4327     }
4328     if (isredundant) {
4329       KSP inner_ksp;
4330       PC inner_pc;
4331       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4332       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4333       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4334     }
4335 
4336     /* assemble coarse matrix */
4337     if (coarse_reuse) {
4338       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4339       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4340       coarse_mat_reuse = MAT_REUSE_MATRIX;
4341     } else {
4342       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4343     }
4344     if (isbddc || isnn) {
4345       if (pcbddc->coarsening_ratio > 1) {
4346         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4347           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4348           if (pcbddc->dbg_flag) {
4349             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4350             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4351             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4352             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4353           }
4354         }
4355         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4356       } else {
4357         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4358         coarse_mat = coarse_mat_is;
4359       }
4360     } else {
4361       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4362     }
4363     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4364 
4365     /* propagate symmetry info of coarse matrix */
4366     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4367     if (pc->pmat->symmetric_set) {
4368       ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr);
4369     }
4370     if (pc->pmat->hermitian_set) {
4371       ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr);
4372     }
4373     if (pc->pmat->spd_set) {
4374       ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr);
4375     }
4376     /* set operators */
4377     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4378     if (pcbddc->dbg_flag) {
4379       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4380     }
4381   } else { /* processes non partecipating to coarse solver (if any) */
4382     coarse_mat = 0;
4383   }
4384   ierr = PetscFree(isarray);CHKERRQ(ierr);
4385 #if 0
4386   {
4387     PetscViewer viewer;
4388     char filename[256];
4389     sprintf(filename,"coarse_mat.m");
4390     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4391     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4392     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4393     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4394   }
4395 #endif
4396 
4397   /* Compute coarse null space (special handling by BDDC only) */
4398   if (pcbddc->NullSpace) {
4399     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4400   }
4401 
4402   if (pcbddc->coarse_ksp) {
4403     Vec crhs,csol;
4404     PetscBool ispreonly;
4405     if (CoarseNullSpace) {
4406       if (isbddc) {
4407         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4408       } else {
4409         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4410       }
4411     }
4412     /* setup coarse ksp */
4413     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4414     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4415     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4416     /* hack */
4417     if (!csol) {
4418       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4419     }
4420     if (!crhs) {
4421       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4422     }
4423     /* Check coarse problem if in debug mode or if solving with an iterative method */
4424     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4425     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4426       KSP       check_ksp;
4427       KSPType   check_ksp_type;
4428       PC        check_pc;
4429       Vec       check_vec,coarse_vec;
4430       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4431       PetscInt  its;
4432       PetscBool compute_eigs;
4433       PetscReal *eigs_r,*eigs_c;
4434       PetscInt  neigs;
4435       const char *prefix;
4436 
4437       /* Create ksp object suitable for estimation of extreme eigenvalues */
4438       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4439       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4440       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4441       if (ispreonly) {
4442         check_ksp_type = KSPPREONLY;
4443         compute_eigs = PETSC_FALSE;
4444       } else {
4445         check_ksp_type = KSPGMRES;
4446         compute_eigs = PETSC_TRUE;
4447       }
4448       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4449       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4450       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4451       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4452       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4453       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4454       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4455       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4456       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4457       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4458       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4459       /* create random vec */
4460       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4461       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4462       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4463       if (CoarseNullSpace) {
4464         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4465       }
4466       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4467       /* solve coarse problem */
4468       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4469       if (CoarseNullSpace) {
4470         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4471       }
4472       /* set eigenvalue estimation if preonly has not been requested */
4473       if (compute_eigs) {
4474         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4475         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4476         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4477         lambda_max = eigs_r[neigs-1];
4478         lambda_min = eigs_r[0];
4479         if (pcbddc->use_coarse_estimates) {
4480           if (lambda_max>lambda_min) {
4481             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4482             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4483           }
4484         }
4485       }
4486 
4487       /* check coarse problem residual error */
4488       if (pcbddc->dbg_flag) {
4489         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4490         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4491         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4492         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4493         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4494         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4495         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4496         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4497         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4498         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4499         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4500         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4501         if (compute_eigs) {
4502           PetscReal lambda_max_s,lambda_min_s;
4503           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4504           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4505           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4506           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4507           for (i=0;i<neigs;i++) {
4508             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4509           }
4510         }
4511         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4512         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4513       }
4514       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4515       if (compute_eigs) {
4516         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4517         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4518       }
4519     }
4520   }
4521   /* print additional info */
4522   if (pcbddc->dbg_flag) {
4523     /* waits until all processes reaches this point */
4524     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4525     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4526     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4527   }
4528 
4529   /* free memory */
4530   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4531   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4532   PetscFunctionReturn(0);
4533 }
4534 
4535 #undef __FUNCT__
4536 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4537 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4538 {
4539   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4540   PC_IS*         pcis = (PC_IS*)pc->data;
4541   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4542   PetscInt       i,local_size,coarse_size=0;
4543   PetscInt       *local_primal_indices=NULL;
4544   PetscInt       *t_local_primal_indices=NULL;
4545   PetscErrorCode ierr;
4546 
4547   PetscFunctionBegin;
4548   /* Compute global number of coarse dofs */
4549   if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) {
4550     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
4551   }
4552   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult,&coarse_size,&t_local_primal_indices);CHKERRQ(ierr);
4553   ierr = PetscMalloc1(pcbddc->local_primal_size,&local_primal_indices);CHKERRQ(ierr);
4554   local_size = 0;
4555   for (i=0;i<pcbddc->local_primal_size_cc;i++) {
4556     PetscInt j;
4557     for (j=0;j<pcbddc->local_primal_ref_mult[i];j++) local_primal_indices[local_size++] = t_local_primal_indices[i] + j;
4558   }
4559   ierr = PetscFree(t_local_primal_indices);CHKERRQ(ierr);
4560   if (local_size != pcbddc->local_primal_size) {
4561     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size);
4562   }
4563 
4564   /* check numbering */
4565   if (pcbddc->dbg_flag) {
4566     PetscScalar coarsesum,*array;
4567     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4568 
4569     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4570     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4571     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4572     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4573     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4574     for (i=0;i<pcbddc->local_primal_size;i++) {
4575       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4576     }
4577     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4578     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4579     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4580     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4581     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4582     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4583     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4584     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4585     for (i=0;i<pcis->n;i++) {
4586       if (array[i] == 1.0) {
4587         set_error = PETSC_TRUE;
4588         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4589       }
4590     }
4591     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4592     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4593     for (i=0;i<pcis->n;i++) {
4594       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4595     }
4596     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4597     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4598     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4599     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4600     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4601     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4602     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4603       PetscInt *gidxs;
4604 
4605       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
4606       ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
4607       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4608       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4609       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4610       for (i=0;i<pcbddc->local_primal_size;i++) {
4611         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr);
4612       }
4613       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4614       ierr = PetscFree(gidxs);CHKERRQ(ierr);
4615     }
4616     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4617     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4618   }
4619   /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */
4620   /* get back data */
4621   *coarse_size_n = coarse_size;
4622   *local_primal_indices_n = local_primal_indices;
4623   PetscFunctionReturn(0);
4624 }
4625 
4626 #undef __FUNCT__
4627 #define __FUNCT__ "PCBDDCGlobalToLocal"
4628 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4629 {
4630   IS             localis_t;
4631   PetscInt       i,lsize,*idxs,n;
4632   PetscScalar    *vals;
4633   PetscErrorCode ierr;
4634 
4635   PetscFunctionBegin;
4636   /* get indices in local ordering exploiting local to global map */
4637   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4638   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4639   for (i=0;i<lsize;i++) vals[i] = 1.0;
4640   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4641   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4642   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4643   if (idxs) { /* multilevel guard */
4644     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4645   }
4646   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4647   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4648   ierr = PetscFree(vals);CHKERRQ(ierr);
4649   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4650   /* now compute set in local ordering */
4651   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4652   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4653   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4654   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4655   for (i=0,lsize=0;i<n;i++) {
4656     if (PetscRealPart(vals[i]) > 0.5) {
4657       lsize++;
4658     }
4659   }
4660   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4661   for (i=0,lsize=0;i<n;i++) {
4662     if (PetscRealPart(vals[i]) > 0.5) {
4663       idxs[lsize++] = i;
4664     }
4665   }
4666   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4667   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4668   *localis = localis_t;
4669   PetscFunctionReturn(0);
4670 }
4671 
4672 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4673 #undef __FUNCT__
4674 #define __FUNCT__ "PCBDDCMatMult_Private"
4675 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4676 {
4677   PCBDDCChange_ctx change_ctx;
4678   PetscErrorCode   ierr;
4679 
4680   PetscFunctionBegin;
4681   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4682   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4683   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4684   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4685   PetscFunctionReturn(0);
4686 }
4687 
4688 #undef __FUNCT__
4689 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4690 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4691 {
4692   PCBDDCChange_ctx change_ctx;
4693   PetscErrorCode   ierr;
4694 
4695   PetscFunctionBegin;
4696   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4697   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4698   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4699   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4700   PetscFunctionReturn(0);
4701 }
4702 
4703 #undef __FUNCT__
4704 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4705 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4706 {
4707   PC_IS               *pcis=(PC_IS*)pc->data;
4708   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4709   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4710   Mat                 S_j;
4711   PetscInt            *used_xadj,*used_adjncy;
4712   PetscBool           free_used_adj;
4713   PetscErrorCode      ierr;
4714 
4715   PetscFunctionBegin;
4716   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4717   free_used_adj = PETSC_FALSE;
4718   if (pcbddc->sub_schurs_layers == -1) {
4719     used_xadj = NULL;
4720     used_adjncy = NULL;
4721   } else {
4722     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4723       used_xadj = pcbddc->mat_graph->xadj;
4724       used_adjncy = pcbddc->mat_graph->adjncy;
4725     } else if (pcbddc->computed_rowadj) {
4726       used_xadj = pcbddc->mat_graph->xadj;
4727       used_adjncy = pcbddc->mat_graph->adjncy;
4728     } else {
4729       PetscBool      flg_row=PETSC_FALSE;
4730       const PetscInt *xadj,*adjncy;
4731       PetscInt       nvtxs;
4732 
4733       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4734       if (flg_row) {
4735         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4736         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4737         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4738         free_used_adj = PETSC_TRUE;
4739       } else {
4740         pcbddc->sub_schurs_layers = -1;
4741         used_xadj = NULL;
4742         used_adjncy = NULL;
4743       }
4744       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4745     }
4746   }
4747 
4748   /* setup sub_schurs data */
4749   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4750   if (!sub_schurs->use_mumps) {
4751     /* pcbddc->ksp_D up to date only if not using MUMPS */
4752     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4753     ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4754   } else {
4755     PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
4756     ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4757   }
4758   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4759 
4760   /* free adjacency */
4761   if (free_used_adj) {
4762     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4763   }
4764   PetscFunctionReturn(0);
4765 }
4766 
4767 #undef __FUNCT__
4768 #define __FUNCT__ "PCBDDCInitSubSchurs"
4769 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4770 {
4771   PC_IS               *pcis=(PC_IS*)pc->data;
4772   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4773   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4774   PCBDDCGraph         graph;
4775   PetscErrorCode      ierr;
4776 
4777   PetscFunctionBegin;
4778   /* attach interface graph for determining subsets */
4779   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4780     IS       verticesIS,verticescomm;
4781     PetscInt vsize,*idxs;
4782 
4783     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4784     ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr);
4785     ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
4786     ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr);
4787     ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
4788     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4789     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4790     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr);
4791     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr);
4792     ierr = ISDestroy(&verticescomm);CHKERRQ(ierr);
4793     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4794 /*
4795     if (pcbddc->dbg_flag) {
4796       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4797     }
4798 */
4799   } else {
4800     graph = pcbddc->mat_graph;
4801   }
4802 
4803   /* sub_schurs init */
4804   ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr);
4805 
4806   /* free graph struct */
4807   if (pcbddc->sub_schurs_rebuild) {
4808     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4809   }
4810   PetscFunctionReturn(0);
4811 }
4812