xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision c0962df8a5994a6ff07e429209b5729e79c2950a)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscScalar     *Smult,*Seigv;
20   PetscReal       *eigs,thresh;
21   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscErrorCode  ierr;
26 
27   PetscFunctionBegin;
28   if (!sub_schurs->use_mumps) {
29     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
30   }
31   if (!sub_schurs->is_hermitian || !sub_schurs->is_posdef) {
32     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented");
33   }
34 
35   /* max size of subsets */
36   mss = 0;
37   for (i=0;i<sub_schurs->n_subs;i++) {
38     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
39       PetscInt subset_size;
40       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
41       mss = PetscMax(mss,subset_size);
42     }
43   }
44 
45   /* min/max and threshold */
46   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
47   nmin = pcbddc->adaptive_nmin > -1 ? pcbddc->adaptive_nmin : 1;
48   nmax = PetscMax(nmin,nmax);
49   if (pcbddc->adaptive_threshold > 1.0) {
50     thresh = 1.0/pcbddc->adaptive_threshold;
51   } else {
52     thresh = 1.0;
53   }
54 
55   /* allocate lapack workspace */
56   cum = cum2 = 0;
57   maxneigs = 0;
58   for (i=0;i<sub_schurs->n_subs;i++) {
59     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
60       PetscInt n,subset_size;
61       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
62       n = PetscMin(subset_size,nmax);
63       cum += subset_size*n;
64       cum2 += n;
65       maxneigs = PetscMax(maxneigs,n);
66     }
67   }
68 
69   if (mss) {
70     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
71       PetscBLASInt B_itype = 1;
72       PetscBLASInt B_N = mss;
73       PetscScalar  zero = 0.0;
74       PetscScalar  eps = 0.0; /* dlamch? */
75 
76       B_lwork = -1;
77       S = NULL;
78       St = NULL;
79       eigs = NULL;
80       eigv = NULL;
81       B_iwork = NULL;
82       B_ifail = NULL;
83       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
84 #if defined(PETSC_USE_COMPLEX)
85       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
86 #else
87       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
88 #endif
89       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
90       ierr = PetscFPTrapPop();CHKERRQ(ierr);
91     } else {
92         /* TODO */
93     }
94   } else {
95     lwork = 0;
96   }
97 
98   nv = 0;
99   if (sub_schurs->is_Ej_com) { /* complement of subsets, each entry is a vertex */
100     ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr);
101   }
102   ierr = PetscBLASIntCast((PetscInt)lwork,&B_lwork);CHKERRQ(ierr);
103   ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs,
104                       B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
105 #if defined(PETSC_USE_COMPLEX)
106   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
107 #endif
108   ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr);
109   ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
110                       nv+cum2+1,&pcbddc->adaptive_constraints_ptrs,
111                       nv+cum,&pcbddc->adaptive_constraints_idxs,
112                       nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
113   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
114 
115   maxneigs = 0;
116   cum = cum2 = cumarray = 0;
117   if (sub_schurs->is_Ej_com) {
118     const PetscInt *idxs;
119 
120     ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
121     for (cum=0;cum<nv;cum++) {
122       pcbddc->adaptive_constraints_n[cum] = 1;
123       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
124       pcbddc->adaptive_constraints_ptrs[cum] = cum;
125       pcbddc->adaptive_constraints_data[cum] = 1.0;
126     }
127     cum2 = cum;
128     ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
129   }
130 
131   if (mss) { /* multilevel */
132     if (pcbddc->use_deluxe_scaling) {
133       ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
134     } else {
135       ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr);
136     }
137     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
138   }
139 
140   for (i=0;i<sub_schurs->n_subs;i++) {
141     PetscInt j,subset_size;
142 
143     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
144     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
145       const PetscInt *idxs;
146       PetscScalar    one = 1.0,zero=0.0;
147       PetscBLASInt   B_N;
148 
149       /* S should be copied since we need it for deluxe scaling */
150       if (sub_schurs->is_hermitian) {
151         PetscInt j;
152         for (j=0;j<subset_size;j++) {
153           ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
154         }
155         for (j=0;j<subset_size;j++) {
156           ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
157         }
158       } else {
159         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
160         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
161       }
162       /* is always this the right matrix? */
163       ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
164 
165       /* we could reuse space already allocated when building sum_S_Ej_tilda_all */
166       /* St = Starray+cumarray; */
167 
168       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
169         PetscBLASInt B_itype = 1;
170         PetscBLASInt B_IL = 1, B_IU;
171         PetscScalar  eps = -1.0; /* dlamch? */
172         PetscInt     nmin_s;
173 
174         /* ask for eigenvalues lower than thresh */
175         PetscPrintf(PETSC_COMM_SELF,"[%d] Computing for sub %d/%d.\n",PetscGlobalRank,i,sub_schurs->n_subs);
176         ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
177         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
178 #if defined(PETSC_USE_COMPLEX)
179         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
180 #else
181         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
182 #endif
183         ierr = PetscFPTrapPop();CHKERRQ(ierr);
184         if (B_ierr) {
185           if (B_ierr < 0 ) {
186             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
187           } else if (B_ierr <= B_N) {
188             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
189           } else {
190             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
191           }
192         }
193 
194         if (B_neigs > nmax) {
195           PetscPrintf(PETSC_COMM_SELF,"[%d]   found %d eigs, more than maximum required %d.\n",PetscGlobalRank,B_neigs,nmax);
196           B_neigs = nmax;
197         }
198 
199         nmin_s = PetscMin(nmin,B_N);
200         if (B_neigs < nmin_s) {
201           PetscBLASInt B_neigs2;
202 
203           B_IL = B_neigs + 1;
204           ierr = PetscBLASIntCast(nmin_s,&B_IU);CHKERRQ(ierr);
205           PetscPrintf(PETSC_COMM_SELF,"[%d]   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",PetscGlobalRank,B_neigs,nmin,B_IL,B_IU);
206           if (sub_schurs->is_hermitian) {
207             PetscInt j;
208             for (j=0;j<subset_size;j++) {
209               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
210             }
211             for (j=0;j<subset_size;j++) {
212               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
213             }
214           } else {
215             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
216             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
217           }
218           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
219 #if defined(PETSC_USE_COMPLEX)
220           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
221 #else
222           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
223 #endif
224           ierr = PetscFPTrapPop();CHKERRQ(ierr);
225           B_neigs += B_neigs2;
226         }
227         if (B_ierr) {
228           if (B_ierr < 0 ) {
229             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
230           } else if (B_ierr <= B_N) {
231             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
232           } else {
233             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
234           }
235         }
236         PetscPrintf(PETSC_COMM_SELF,"[%d]   -> Got %d eigs\n",PetscGlobalRank,B_neigs);
237         for (j=0;j<B_neigs;j++) {
238           if (eigs[j] == 0.0) {
239             PetscPrintf(PETSC_COMM_SELF,"[%d]     Inf\n",PetscGlobalRank);
240           } else {
241             PetscPrintf(PETSC_COMM_SELF,"[%d]     %1.6e\n",PetscGlobalRank,1.0/eigs[j]);
242           }
243         }
244       } else {
245           /* TODO */
246       }
247       maxneigs = PetscMax(B_neigs,maxneigs);
248       pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
249 
250       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
251       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&zero,Seigv,&B_N));
252       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
253 #if 0
254       PetscInt ii;
255       for (ii=0;ii<B_neigs;ii++) {
256         PetscPrintf(PETSC_COMM_SELF,"[%d]   -> Eigenvector %d/%d (%d)\n",PetscGlobalRank,ii,B_neigs,B_N);
257         for (j=0;j<B_N;j++) {
258           PetscPrintf(PETSC_COMM_SELF,"[%d]       %1.4e %1.4e\n",PetscGlobalRank,eigv[ii*B_N+j],Seigv[ii*B_N+j]);
259         }
260       }
261 #endif
262       ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
263       for (j=0;j<B_neigs;j++) {
264 #if 0
265         {
266           PetscBLASInt Blas_N,Blas_one = 1.0;
267           PetscScalar norm;
268           ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
269           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
270           if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
271             norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
272           } else {
273             norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
274           }
275           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
276         }
277 #endif
278         ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
279         pcbddc->adaptive_constraints_ptrs[cum++] = cum2;
280         cum2 += subset_size;
281       }
282       ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
283     }
284     /* shift for next computation */
285     cumarray += subset_size*subset_size;
286   }
287   pcbddc->adaptive_constraints_ptrs[cum] = cum2;
288   ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr);
289 
290   if (mss) {
291     if (pcbddc->use_deluxe_scaling) {
292       ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
293     } else {
294       ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr);
295     }
296     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
297   }
298   ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
299 #if defined(PETSC_USE_COMPLEX)
300   ierr = PetscFree(rwork);CHKERRQ(ierr);
301 #endif
302   if (pcbddc->dbg_flag) {
303     PetscInt maxneigs_r;
304     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
305     ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
306   }
307   PetscFunctionReturn(0);
308 }
309 
310 #undef __FUNCT__
311 #define __FUNCT__ "PCBDDCSetUpSolvers"
312 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
313 {
314   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
315   PetscScalar    *coarse_submat_vals;
316   PetscErrorCode ierr;
317 
318   PetscFunctionBegin;
319   /* Setup local scatters R_to_B and (optionally) R_to_D */
320   /* PCBDDCSetUpLocalWorkVectors should be called first! */
321   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
322 
323   /* Setup local neumann solver ksp_R */
324   /* PCBDDCSetUpLocalScatters should be called first! */
325   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
326 
327   /* Change global null space passed in by the user if change of basis has been requested */
328   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
329     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
330   }
331 
332   /*
333      Setup local correction and local part of coarse basis.
334      Gives back the dense local part of the coarse matrix in column major ordering
335   */
336   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
337 
338   /* Compute total number of coarse nodes and setup coarse solver */
339   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
340 
341   /* free */
342   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
343   PetscFunctionReturn(0);
344 }
345 
346 #undef __FUNCT__
347 #define __FUNCT__ "PCBDDCResetCustomization"
348 PetscErrorCode PCBDDCResetCustomization(PC pc)
349 {
350   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
351   PetscErrorCode ierr;
352 
353   PetscFunctionBegin;
354   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
355   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
356   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
357   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
358   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
359   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
360   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
361   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
362   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
363   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
364   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
365   PetscFunctionReturn(0);
366 }
367 
368 #undef __FUNCT__
369 #define __FUNCT__ "PCBDDCResetTopography"
370 PetscErrorCode PCBDDCResetTopography(PC pc)
371 {
372   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
373   PetscErrorCode ierr;
374 
375   PetscFunctionBegin;
376   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
377   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
378   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
379   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
380   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
381   PetscFunctionReturn(0);
382 }
383 
384 #undef __FUNCT__
385 #define __FUNCT__ "PCBDDCResetSolvers"
386 PetscErrorCode PCBDDCResetSolvers(PC pc)
387 {
388   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
389   PetscScalar    *array;
390   PetscErrorCode ierr;
391 
392   PetscFunctionBegin;
393   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
394   if (pcbddc->coarse_phi_B) {
395     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
396     ierr = PetscFree(array);CHKERRQ(ierr);
397   }
398   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
399   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
400   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
401   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
402   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
403   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
404   if (pcbddc->local_auxmat2) {
405     ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
406     ierr = PetscFree(array);CHKERRQ(ierr);
407   }
408   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
409   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
410   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
411   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
412   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
413   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
414   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
415   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
416   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
417   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
418   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
419   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
420   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
421   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
422   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
423   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
424   PetscFunctionReturn(0);
425 }
426 
427 #undef __FUNCT__
428 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
429 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
430 {
431   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
432   PC_IS          *pcis = (PC_IS*)pc->data;
433   VecType        impVecType;
434   PetscInt       n_constraints,n_R,old_size;
435   PetscErrorCode ierr;
436 
437   PetscFunctionBegin;
438   if (!pcbddc->ConstraintMatrix) {
439     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
440   }
441   /* get sizes */
442   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
443   n_R = pcis->n-pcbddc->n_actual_vertices;
444   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
445   /* local work vectors (try to avoid unneeded work)*/
446   /* R nodes */
447   old_size = -1;
448   if (pcbddc->vec1_R) {
449     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
450   }
451   if (n_R != old_size) {
452     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
453     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
454     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
455     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
456     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
457     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
458   }
459   /* local primal dofs */
460   old_size = -1;
461   if (pcbddc->vec1_P) {
462     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
463   }
464   if (pcbddc->local_primal_size != old_size) {
465     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
466     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
467     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
468     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
469   }
470   /* local explicit constraints */
471   old_size = -1;
472   if (pcbddc->vec1_C) {
473     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
474   }
475   if (n_constraints && n_constraints != old_size) {
476     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
477     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
478     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
479     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
480   }
481   PetscFunctionReturn(0);
482 }
483 
484 #undef __FUNCT__
485 #define __FUNCT__ "PCBDDCSetUpCorrection"
486 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
487 {
488   PetscErrorCode         ierr;
489   /* pointers to pcis and pcbddc */
490   PC_IS*                 pcis = (PC_IS*)pc->data;
491   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
492   /* submatrices of local problem */
493   Mat                    A_RV,A_VR,A_VV;
494   /* submatrices of local coarse problem */
495   Mat                    S_VV,S_CV,S_VC,S_CC;
496   /* working matrices */
497   Mat                    C_CR;
498   /* additional working stuff */
499   PC                     pc_R;
500   Mat                    F;
501   PetscBool              isLU,isCHOL,isILU;
502 
503   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
504   PetscScalar            *work;
505   PetscInt               *idx_V_B;
506   PetscInt               n,n_vertices,n_constraints;
507   PetscInt               i,n_R,n_D,n_B;
508   PetscBool              unsymmetric_check;
509   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
510   MatType                impMatType;
511   /* some shortcuts to scalars */
512   PetscScalar            one=1.0,m_one=-1.0;
513 
514   PetscFunctionBegin;
515   /* get number of vertices (corners plus constraints with change of basis)
516      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
517   n_vertices = pcbddc->n_actual_vertices;
518   n_constraints = pcbddc->local_primal_size-n_vertices;
519   /* Set Non-overlapping dimensions */
520   n_B = pcis->n_B; n_D = pcis->n - n_B;
521   n_R = pcis->n-n_vertices;
522 
523   /* Set types for local objects needed by BDDC precondtioner */
524   impMatType = MATSEQDENSE;
525 
526   /* vertices in boundary numbering */
527   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
528   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
529   if (i != n_vertices) {
530     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
531   }
532 
533   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
534   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
535   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
536   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
537   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
538   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
539   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
540   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
541   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
542   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
543 
544   unsymmetric_check = PETSC_FALSE;
545   /* allocate workspace */
546   n = 0;
547   if (n_constraints) {
548     n += n_R*n_constraints;
549   }
550   if (n_vertices) {
551     n = PetscMax(2*n_R*n_vertices,n);
552   }
553   if (!pcbddc->issym) {
554     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
555     unsymmetric_check = PETSC_TRUE;
556   }
557   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
558 
559   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
560   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
561   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
562   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
563   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
564   if (isLU || isILU || isCHOL) {
565     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
566   } else {
567     F = NULL;
568   }
569 
570   /* Precompute stuffs needed for preprocessing and application of BDDC*/
571   if (n_constraints) {
572     Mat M1,M2,M3;
573     IS  is_aux;
574     /* see if we can save some allocations */
575     if (pcbddc->local_auxmat2) {
576       PetscInt on_R,on_constraints;
577       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
578       if (on_R != n_R || on_constraints != n_constraints) {
579         PetscScalar *marray;
580 
581         ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr);
582         ierr = PetscFree(marray);CHKERRQ(ierr);
583         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
584         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
585       }
586     }
587     /* auxiliary matrices */
588     if (!pcbddc->local_auxmat2) {
589       PetscScalar *marray;
590 
591       ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr);
592       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr);
593       marray += n_R*n_constraints;
594       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr);
595     }
596 
597     /* Extract constraints on R nodes: C_{CR}  */
598     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
599     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
600     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
601 
602     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
603     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
604     for (i=0;i<n_constraints;i++) {
605       const PetscScalar *row_cmat_values;
606       const PetscInt    *row_cmat_indices;
607       PetscInt          size_of_constraint,j;
608 
609       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
610       for (j=0;j<size_of_constraint;j++) {
611         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
612       }
613       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
614     }
615     if (F) {
616       Mat B;
617 
618       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
619       ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr);
620       ierr = MatDestroy(&B);CHKERRQ(ierr);
621     } else {
622       PetscScalar *xarray;
623       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
624       for (i=0;i<n_constraints;i++) {
625         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
626         ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr);
627         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
628         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
629         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
630       }
631       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
632     }
633 
634     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
635     ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
636     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
637     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
638     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
639     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
640     ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr);
641     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
642     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
643     ierr = MatDestroy(&M2);CHKERRQ(ierr);
644     ierr = MatDestroy(&M3);CHKERRQ(ierr);
645     /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */
646     ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
647     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
648     ierr = MatDestroy(&M1);CHKERRQ(ierr);
649   }
650   /* Get submatrices from subdomain matrix */
651   if (n_vertices) {
652     Mat       newmat;
653     IS        is_aux;
654     PetscInt  ibs,mbs;
655     PetscBool issbaij;
656 
657     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
658     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
659     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
660     if (ibs != mbs) { /* need to convert to SEQAIJ */
661       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
662       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
663       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
664       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
665       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
666     } else {
667       /* this is safe */
668       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
669       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
670       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
671         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
672         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
673         ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
674         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
675         ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
676       } else {
677         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
678         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
679       }
680     }
681     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
682   }
683 
684   /* Matrix of coarse basis functions (local) */
685   if (pcbddc->coarse_phi_B) {
686     PetscInt on_B,on_primal,on_D=n_D;
687     if (pcbddc->coarse_phi_D) {
688       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
689     }
690     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
691     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
692       PetscScalar *marray;
693 
694       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
695       ierr = PetscFree(marray);CHKERRQ(ierr);
696       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
697       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
698       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
699       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
700     }
701   }
702 
703   if (!pcbddc->coarse_phi_B) {
704     PetscScalar *marray;
705 
706     n = n_B*pcbddc->local_primal_size;
707     if (pcbddc->switch_static || pcbddc->dbg_flag) {
708       n += n_D*pcbddc->local_primal_size;
709     }
710     if (!pcbddc->issym) {
711       n *= 2;
712     }
713     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
714     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
715     n = n_B*pcbddc->local_primal_size;
716     if (pcbddc->switch_static || pcbddc->dbg_flag) {
717       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
718       n += n_D*pcbddc->local_primal_size;
719     }
720     if (!pcbddc->issym) {
721       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
722       if (pcbddc->switch_static || pcbddc->dbg_flag) {
723         n = n_B*pcbddc->local_primal_size;
724         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
725       }
726     } else {
727       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
728       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
729       if (pcbddc->switch_static || pcbddc->dbg_flag) {
730         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
731         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
732       }
733     }
734   }
735   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
736   /* vertices */
737   if (n_vertices) {
738 
739     if (n_R) {
740       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
741       PetscBLASInt B_N,B_one = 1;
742       PetscScalar  *x,*y;
743 
744       ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
745       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
746       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
747       if (F) {
748         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
749       } else {
750         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
751         for (i=0;i<n_vertices;i++) {
752           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
753           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
754           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
755           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
756           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
757         }
758         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
759       }
760       ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr);
761       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
762       if (n_constraints) {
763         Mat B;
764         ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
765         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
766         ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
767         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
768         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
769         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
770         ierr = MatDestroy(&B);CHKERRQ(ierr);
771       }
772       ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
773       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
774       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
775       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
776       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
777       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
778       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
779       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
780       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
781       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
782       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
783       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
784     } else {
785       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
786       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
787     }
788     /* coarse basis functions */
789     for (i=0;i<n_vertices;i++) {
790       PetscScalar *y;
791 
792       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
793       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
794       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
795       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
796       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
797       y[n_B*i+idx_V_B[i]] = 1.0;
798       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
799       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
800 
801       if (pcbddc->switch_static || pcbddc->dbg_flag) {
802         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
803         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
804         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
805         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
806         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
807         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
808       }
809       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
810     }
811     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
812     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
813   }
814 
815   if (n_constraints) {
816     Mat B;
817 
818     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
819     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
820     ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
821     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
822     if (n_vertices) {
823       ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr);
824     }
825     ierr = MatDestroy(&B);CHKERRQ(ierr);
826     /* coarse basis functions */
827     for (i=0;i<n_constraints;i++) {
828       PetscScalar *y;
829 
830       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
831       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
832       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
833       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
834       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
835       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
836       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
837 
838       if (pcbddc->switch_static || pcbddc->dbg_flag) {
839         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
840         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
841         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
842         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
843         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
844         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
845       }
846       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
847     }
848   }
849 
850   /* compute other basis functions for non-symmetric problems */
851   if (!pcbddc->issym) {
852     Mat B,X;
853 
854     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr);
855 
856     if (n_constraints) {
857       Mat S_CCT,B_C;
858 
859       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
860       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
861       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
862       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
863       if (n_vertices) {
864         Mat B_V,S_VCT;
865 
866         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
867         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
868         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
869         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
870         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
871       }
872       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
873     }
874     if (n_vertices && n_R) {
875       Mat          A_VRT;
876       PetscBLASInt B_N,B_one = 1;
877 
878       if (!n_constraints) { /* if there are no constraints, reset work */
879         ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
880       }
881       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr);
882       ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
883       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
884       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one));
885       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
886     }
887 
888     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr);
889     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
890       for (i=0;i<pcbddc->local_primal_size;i++) {
891         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
892         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
893         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
894         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
895         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
896       }
897     } else {
898       for (i=0;i<pcbddc->local_primal_size;i++) {
899         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
900         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
901         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
902         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
903         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
904       }
905     }
906     ierr = MatDestroy(&B);CHKERRQ(ierr);
907     /* coarse basis functions */
908     for (i=0;i<pcbddc->local_primal_size;i++) {
909       PetscScalar *y;
910 
911       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
912       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
913       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
914       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
915       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
916       if (i<n_vertices) {
917         y[n_B*i+idx_V_B[i]] = 1.0;
918       }
919       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
920       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
921 
922       if (pcbddc->switch_static || pcbddc->dbg_flag) {
923         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
924         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
925         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
926         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
927         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
928         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
929       }
930       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
931     }
932     ierr = MatDestroy(&X);CHKERRQ(ierr);
933   }
934 
935   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
936   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
937   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
938   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
939   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
940   /* Checking coarse_sub_mat and coarse basis functios */
941   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
942   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
943   if (pcbddc->dbg_flag) {
944     Mat         coarse_sub_mat;
945     Mat         AUXMAT,TM1,TM2,TM3,TM4;
946     Mat         coarse_phi_D,coarse_phi_B;
947     Mat         coarse_psi_D,coarse_psi_B;
948     Mat         A_II,A_BB,A_IB,A_BI;
949     MatType     checkmattype=MATSEQAIJ;
950     PetscReal   real_value;
951 
952     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
953     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
954     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
955     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
956     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
957     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
958     if (unsymmetric_check) {
959       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
960       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
961     }
962     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
963 
964     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
965     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr);
966     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
967     if (unsymmetric_check) {
968       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
969       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
970       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
971       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
972       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
973       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
974       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
975       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
976       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
977       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
978       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
979       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
980     } else {
981       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
982       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
983       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
984       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
985       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
986       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
987       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
988       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
989     }
990     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
991     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
992     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
993     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
994     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
995     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
996 
997     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
998     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
999     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1000     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1001     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1002     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1003     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1004     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1005     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1006     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1007     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1008     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1009     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1010     if (unsymmetric_check) {
1011       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1012       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1013     }
1014     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1015   }
1016 
1017   /* free memory */
1018   ierr = PetscFree(work);CHKERRQ(ierr);
1019   if (n_vertices) {
1020     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1021   }
1022   if (n_constraints) {
1023     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1024   }
1025   /* get back data */
1026   *coarse_submat_vals_n = coarse_submat_vals;
1027   PetscFunctionReturn(0);
1028 }
1029 
1030 #undef __FUNCT__
1031 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1032 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, MatStructure reuse, Mat* B)
1033 {
1034   Mat            *work_mat;
1035   IS             isrow_s,iscol_s;
1036   PetscBool      rsorted,csorted;
1037   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1038   PetscErrorCode ierr;
1039 
1040   PetscFunctionBegin;
1041   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1042   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1043   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1044   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1045 
1046   if (!rsorted) {
1047     const PetscInt *idxs;
1048     PetscInt *idxs_sorted,i;
1049 
1050     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1051     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1052     for (i=0;i<rsize;i++) {
1053       idxs_perm_r[i] = i;
1054     }
1055     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1056     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1057     for (i=0;i<rsize;i++) {
1058       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1059     }
1060     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1061     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1062   } else {
1063     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1064     isrow_s = isrow;
1065   }
1066 
1067   if (!csorted) {
1068     if (isrow == iscol) {
1069       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1070       iscol_s = isrow_s;
1071     } else {
1072       const PetscInt *idxs;
1073       PetscInt *idxs_sorted,i;
1074 
1075       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1076       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1077       for (i=0;i<csize;i++) {
1078         idxs_perm_c[i] = i;
1079       }
1080       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1081       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1082       for (i=0;i<csize;i++) {
1083         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1084       }
1085       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1086       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1087     }
1088   } else {
1089     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1090     iscol_s = iscol;
1091   }
1092 
1093   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,reuse,&work_mat);CHKERRQ(ierr);
1094 
1095   if (!rsorted || !csorted) {
1096     Mat      new_mat;
1097     IS       is_perm_r,is_perm_c;
1098 
1099     if (!rsorted) {
1100       PetscInt *idxs_r,i;
1101       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1102       for (i=0;i<rsize;i++) {
1103         idxs_r[idxs_perm_r[i]] = i;
1104       }
1105       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1106       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1107     } else {
1108       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1109     }
1110     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1111 
1112     if (!csorted) {
1113       if (isrow_s == iscol_s) {
1114         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1115         is_perm_c = is_perm_r;
1116       } else {
1117         PetscInt *idxs_c,i;
1118         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1119         for (i=0;i<csize;i++) {
1120           idxs_c[idxs_perm_c[i]] = i;
1121         }
1122         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1123         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1124       }
1125     } else {
1126       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1127     }
1128     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1129 
1130     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1131     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1132     work_mat[0] = new_mat;
1133     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1134     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1135   }
1136 
1137   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1138   *B = work_mat[0];
1139   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1140   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1141   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #undef __FUNCT__
1146 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1147 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1148 {
1149   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1150   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1151   Mat            new_mat;
1152   IS             is_local,is_global;
1153   PetscInt       local_size;
1154   PetscBool      isseqaij;
1155   PetscErrorCode ierr;
1156 
1157   PetscFunctionBegin;
1158   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1159   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1160   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1161   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1162   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1163   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,MAT_INITIAL_MATRIX,&new_mat);CHKERRQ(ierr);
1164   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1165 
1166   /* check */
1167   if (pcbddc->dbg_flag) {
1168     Vec       x,x_change;
1169     PetscReal error;
1170 
1171     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1172     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1173     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1174     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1175     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1176     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1177     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1178     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1179     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1180     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1181     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1182     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1183     ierr = VecDestroy(&x);CHKERRQ(ierr);
1184     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1185   }
1186 
1187   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1188   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1189   if (isseqaij) {
1190     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1191   } else {
1192     Mat work_mat;
1193     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1194     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1195     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1196   }
1197   ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
1198   /*
1199   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1200   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1201   */
1202   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1203   PetscFunctionReturn(0);
1204 }
1205 
1206 #undef __FUNCT__
1207 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1208 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1209 {
1210   PC_IS*         pcis = (PC_IS*)(pc->data);
1211   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1212   IS             is_aux1,is_aux2;
1213   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1214   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
1215   PetscInt       vbs,bs;
1216   PetscBT        bitmask;
1217   PetscErrorCode ierr;
1218 
1219   PetscFunctionBegin;
1220   /*
1221     No need to setup local scatters if
1222       - primal space is unchanged
1223         AND
1224       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1225         AND
1226       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1227   */
1228   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1229     PetscFunctionReturn(0);
1230   }
1231   /* destroy old objects */
1232   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1233   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1234   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1235   /* Set Non-overlapping dimensions */
1236   n_B = pcis->n_B; n_D = pcis->n - n_B;
1237   n_vertices = pcbddc->n_actual_vertices;
1238   /* create auxiliary bitmask */
1239   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1240   for (i=0;i<n_vertices;i++) {
1241     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
1242   }
1243 
1244   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1245   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1246   for (i=0, n_R=0; i<pcis->n; i++) {
1247     if (!PetscBTLookup(bitmask,i)) {
1248       idx_R_local[n_R] = i;
1249       n_R++;
1250     }
1251   }
1252 
1253   /* Block code */
1254   vbs = 1;
1255   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1256   if (bs>1 && !(n_vertices%bs)) {
1257     PetscBool is_blocked = PETSC_TRUE;
1258     PetscInt  *vary;
1259     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1260     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1261     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1262     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
1263     for (i=0; i<n_vertices; i++) {
1264       if (vary[i]!=0 && vary[i]!=bs) {
1265         is_blocked = PETSC_FALSE;
1266         break;
1267       }
1268     }
1269     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1270       vbs = bs;
1271       for (i=0;i<n_R/vbs;i++) {
1272         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1273       }
1274     }
1275     ierr = PetscFree(vary);CHKERRQ(ierr);
1276   }
1277   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1278   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1279 
1280   /* print some info if requested */
1281   if (pcbddc->dbg_flag) {
1282     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1283     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1284     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1285     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1286     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1287     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1288     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
1289     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1290   }
1291 
1292   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1293   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1294   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1295   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1296   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1297   for (i=0; i<n_D; i++) {
1298     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1299   }
1300   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1301   for (i=0, j=0; i<n_R; i++) {
1302     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1303       aux_array1[j++] = i;
1304     }
1305   }
1306   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1307   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1308   for (i=0, j=0; i<n_B; i++) {
1309     if (!PetscBTLookup(bitmask,is_indices[i])) {
1310       aux_array2[j++] = i;
1311     }
1312   }
1313   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1314   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1315   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1316   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1317   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1318 
1319   if (pcbddc->switch_static || pcbddc->dbg_flag) {
1320     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1321     for (i=0, j=0; i<n_R; i++) {
1322       if (PetscBTLookup(bitmask,idx_R_local[i])) {
1323         aux_array1[j++] = i;
1324       }
1325     }
1326     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1327     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1328     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1329   }
1330   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1331   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1332   PetscFunctionReturn(0);
1333 }
1334 
1335 
1336 #undef __FUNCT__
1337 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1338 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1339 {
1340   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1341   PC_IS          *pcis = (PC_IS*)pc->data;
1342   PC             pc_temp;
1343   Mat            A_RR;
1344   MatReuse       reuse;
1345   PetscScalar    m_one = -1.0;
1346   PetscReal      value;
1347   PetscInt       n_D,n_R,ibs,mbs;
1348   PetscBool      use_exact,use_exact_reduced,issbaij;
1349   PetscErrorCode ierr;
1350   /* prefixes stuff */
1351   char           dir_prefix[256],neu_prefix[256],str_level[16];
1352   size_t         len;
1353 
1354   PetscFunctionBegin;
1355 
1356   /* compute prefixes */
1357   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1358   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1359   if (!pcbddc->current_level) {
1360     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1361     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1362     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1363     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1364   } else {
1365     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1366     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1367     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1368     len -= 15; /* remove "pc_bddc_coarse_" */
1369     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1370     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1371     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1372     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1373     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1374     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1375     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1376     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1377   }
1378 
1379   /* DIRICHLET PROBLEM */
1380   if (dirichlet) {
1381     /* Matrix for Dirichlet problem is pcis->A_II */
1382     ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1383     if (!pcbddc->ksp_D) { /* create object if not yet build */
1384       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1385       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1386       /* default */
1387       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1388       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1389       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1390       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1391       if (issbaij) {
1392         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1393       } else {
1394         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1395       }
1396       /* Allow user's customization */
1397       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1398       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1399     }
1400     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1401     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1402     if (!n_D) {
1403       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1404       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1405     }
1406     /* Set Up KSP for Dirichlet problem of BDDC */
1407     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1408     /* set ksp_D into pcis data */
1409     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1410     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1411     pcis->ksp_D = pcbddc->ksp_D;
1412   }
1413 
1414   /* NEUMANN PROBLEM */
1415   A_RR = 0;
1416   if (neumann) {
1417     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1418     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1419     if (pcbddc->ksp_R) { /* already created ksp */
1420       PetscInt nn_R;
1421       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1422       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1423       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1424       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1425         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1426         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1427         reuse = MAT_INITIAL_MATRIX;
1428       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1429         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1430           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1431           reuse = MAT_INITIAL_MATRIX;
1432         } else { /* safe to reuse the matrix */
1433           reuse = MAT_REUSE_MATRIX;
1434         }
1435       }
1436       /* last check */
1437       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1438         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1439         reuse = MAT_INITIAL_MATRIX;
1440       }
1441     } else { /* first time, so we need to create the matrix */
1442       reuse = MAT_INITIAL_MATRIX;
1443     }
1444     /* extract A_RR */
1445     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1446     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1447     if (ibs != mbs) {
1448       Mat newmat;
1449       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1450       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1451       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1452     } else {
1453       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1454     }
1455     if (!pcbddc->ksp_R) { /* create object if not present */
1456       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1457       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1458       /* default */
1459       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1460       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1461       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1462       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1463       if (issbaij) {
1464         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1465       } else {
1466         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1467       }
1468       /* Allow user's customization */
1469       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1470       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1471     }
1472     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1473     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1474     if (!n_R) {
1475       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1476       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1477     }
1478     /* Set Up KSP for Neumann problem of BDDC */
1479     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1480   }
1481 
1482   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1483   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1484     if (pcbddc->dbg_flag) {
1485       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1486       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1487       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1488     }
1489     if (dirichlet) { /* Dirichlet */
1490       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1491       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1492       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1493       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1494       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1495       /* need to be adapted? */
1496       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1497       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1498       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1499       /* print info */
1500       if (pcbddc->dbg_flag) {
1501         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1502         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1503       }
1504       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1505         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1506       }
1507     }
1508     if (neumann) { /* Neumann */
1509       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1510       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1511       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1512       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1513       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1514       /* need to be adapted? */
1515       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1516       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1517       /* print info */
1518       if (pcbddc->dbg_flag) {
1519         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1520         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1521       }
1522       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1523         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1524       }
1525     }
1526   }
1527   /* free Neumann problem's matrix */
1528   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 #undef __FUNCT__
1533 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1534 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1535 {
1536   PetscErrorCode ierr;
1537   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1538 
1539   PetscFunctionBegin;
1540   if (applytranspose) {
1541     if (pcbddc->local_auxmat1) {
1542       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1543       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1544     }
1545     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1546   } else {
1547     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1548     if (pcbddc->local_auxmat1) {
1549       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1550       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1551     }
1552   }
1553   PetscFunctionReturn(0);
1554 }
1555 
1556 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1557 #undef __FUNCT__
1558 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1559 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1560 {
1561   PetscErrorCode ierr;
1562   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1563   PC_IS*            pcis = (PC_IS*)  (pc->data);
1564   const PetscScalar zero = 0.0;
1565 
1566   PetscFunctionBegin;
1567   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1568   if (applytranspose) {
1569     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1570     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1571   } else {
1572     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1573     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1574   }
1575   /* start communications from local primal nodes to rhs of coarse solver */
1576   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1577   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1578   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1579 
1580   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1581   /* TODO remove null space when doing multilevel */
1582   if (pcbddc->coarse_ksp) {
1583     if (applytranspose) {
1584       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1585     } else {
1586       ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1587     }
1588   }
1589 
1590   /* Local solution on R nodes */
1591   if (pcis->n) {
1592     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1593     ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1594     ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1595     if (pcbddc->switch_static) {
1596       ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1597       ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1598     }
1599     ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1600     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1601     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1602     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1603     if (pcbddc->switch_static) {
1604       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1605       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1606     }
1607   }
1608 
1609   /* communications from coarse sol to local primal nodes */
1610   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1611   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1612 
1613   /* Sum contributions from two levels */
1614   if (applytranspose) {
1615     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1616     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1617   } else {
1618     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1619     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1620   }
1621   PetscFunctionReturn(0);
1622 }
1623 
1624 #undef __FUNCT__
1625 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1626 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1627 {
1628   PetscErrorCode ierr;
1629   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1630   PetscScalar    *array;
1631   Vec            from,to;
1632 
1633   PetscFunctionBegin;
1634   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1635     from = pcbddc->coarse_vec;
1636     to = pcbddc->vec1_P;
1637     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1638       Vec tvec;
1639 
1640       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1641       ierr = VecResetArray(tvec);CHKERRQ(ierr);
1642       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1643       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
1644       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
1645       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
1646     }
1647   } else { /* from local to global -> put data in coarse right hand side */
1648     from = pcbddc->vec1_P;
1649     to = pcbddc->coarse_vec;
1650   }
1651   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1652   PetscFunctionReturn(0);
1653 }
1654 
1655 #undef __FUNCT__
1656 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1657 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1658 {
1659   PetscErrorCode ierr;
1660   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1661   PetscScalar    *array;
1662   Vec            from,to;
1663 
1664   PetscFunctionBegin;
1665   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1666     from = pcbddc->coarse_vec;
1667     to = pcbddc->vec1_P;
1668   } else { /* from local to global -> put data in coarse right hand side */
1669     from = pcbddc->vec1_P;
1670     to = pcbddc->coarse_vec;
1671   }
1672   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1673   if (smode == SCATTER_FORWARD) {
1674     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1675       Vec tvec;
1676 
1677       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1678       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
1679       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
1680       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
1681     }
1682   } else {
1683     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
1684      ierr = VecResetArray(from);CHKERRQ(ierr);
1685     }
1686   }
1687   PetscFunctionReturn(0);
1688 }
1689 
1690 /* uncomment for testing purposes */
1691 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1692 #undef __FUNCT__
1693 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1694 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1695 {
1696   PetscErrorCode    ierr;
1697   PC_IS*            pcis = (PC_IS*)(pc->data);
1698   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1699   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1700   /* one and zero */
1701   PetscScalar       one=1.0,zero=0.0;
1702   /* space to store constraints and their local indices */
1703   PetscScalar       *temp_quadrature_constraint;
1704   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1705   /* iterators */
1706   PetscInt          i,j,k,total_counts,temp_start_ptr;
1707   /* stuff to store connected components stored in pcbddc->mat_graph */
1708   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1709   PetscInt          n_ISForFaces,n_ISForEdges;
1710   /* near null space stuff */
1711   MatNullSpace      nearnullsp;
1712   const Vec         *nearnullvecs;
1713   Vec               *localnearnullsp;
1714   PetscBool         nnsp_has_cnst;
1715   PetscInt          nnsp_size;
1716   PetscScalar       *array;
1717   /* BLAS integers */
1718   PetscBLASInt      lwork,lierr;
1719   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1720   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1721   /* LAPACK working arrays for SVD or POD */
1722   PetscBool         skip_lapack;
1723   PetscScalar       *work;
1724   PetscReal         *singular_vals;
1725 #if defined(PETSC_USE_COMPLEX)
1726   PetscReal         *rwork;
1727 #endif
1728 #if defined(PETSC_MISSING_LAPACK_GESVD)
1729   PetscBLASInt      Blas_one_2=1;
1730   PetscScalar       *temp_basis,*correlation_mat;
1731 #else
1732   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1733   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1734 #endif
1735   /* reuse */
1736   PetscInt          olocal_primal_size;
1737   PetscInt          *oprimal_indices_local_idxs;
1738   /* change of basis */
1739   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1740   PetscBool         boolforchange,qr_needed;
1741   PetscBT           touched,change_basis,qr_needed_idx;
1742   /* auxiliary stuff */
1743   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1744   PetscInt          ncc,*gidxs=NULL,*permutation=NULL,*temp_indices_to_constraint_work=NULL;
1745   PetscScalar       *temp_quadrature_constraint_work=NULL;
1746   /* some quantities */
1747   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1748   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1749 
1750 
1751   PetscFunctionBegin;
1752   /* Destroy Mat objects computed previously */
1753   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1754   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1755   /* TODO synch with adaptive selection */
1756   /* Get index sets for faces, edges and vertices from graph */
1757   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1758   /* free unneeded index sets */
1759   if (!pcbddc->use_vertices) {
1760     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1761   }
1762   if (!pcbddc->use_edges) {
1763     for (i=0;i<n_ISForEdges;i++) {
1764       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1765     }
1766     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1767     n_ISForEdges = 0;
1768   }
1769   if (!pcbddc->use_faces) {
1770     for (i=0;i<n_ISForFaces;i++) {
1771       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1772     }
1773     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1774     n_ISForFaces = 0;
1775   }
1776   /* HACKS (the following two blocks of code) */
1777   if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1778     pcbddc->use_change_of_basis = PETSC_TRUE;
1779     if (!ISForEdges) {
1780       pcbddc->use_change_on_faces = PETSC_TRUE;
1781     }
1782   }
1783   if (pcbddc->NullSpace) {
1784     /* use_change_of_basis should be consistent among processors */
1785     PetscBool tbool[2],gbool[2];
1786     tbool [0] = pcbddc->use_change_of_basis;
1787     tbool [1] = pcbddc->use_change_on_faces;
1788     ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1789     pcbddc->use_change_of_basis = gbool[0];
1790     pcbddc->use_change_on_faces = gbool[1];
1791   }
1792   /* print some info */
1793   if (pcbddc->dbg_flag) {
1794     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1795     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1796     i = 0;
1797     if (ISForVertices) {
1798       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1799     }
1800     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1801     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1802     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1803     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1804   }
1805 
1806   if (!pcbddc->adaptive_selection) {
1807     /* check if near null space is attached to global mat */
1808     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1809     if (nearnullsp) {
1810       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1811       /* remove any stored info */
1812       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1813       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1814       /* store information for BDDC solver reuse */
1815       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1816       pcbddc->onearnullspace = nearnullsp;
1817       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1818       for (i=0;i<nnsp_size;i++) {
1819         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1820       }
1821     } else { /* if near null space is not provided BDDC uses constants by default */
1822       nnsp_size = 0;
1823       nnsp_has_cnst = PETSC_TRUE;
1824     }
1825     /* get max number of constraints on a single cc */
1826     max_constraints = nnsp_size;
1827     if (nnsp_has_cnst) max_constraints++;
1828 
1829     /*
1830          Evaluate maximum storage size needed by the procedure
1831          - temp_indices will contain start index of each constraint stored as follows
1832          - temp_indices_to_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1833          - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1834          - temp_quadrature_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself
1835                                                                                                                                                            */
1836     total_counts = n_ISForFaces+n_ISForEdges;
1837     total_counts *= max_constraints;
1838     n_vertices = 0;
1839     if (ISForVertices) {
1840       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1841     }
1842     total_counts += n_vertices;
1843     ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr);
1844     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1845     total_counts = 0;
1846     max_size_of_constraint = 0;
1847     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1848       if (i<n_ISForEdges) {
1849         used_IS = &ISForEdges[i];
1850       } else {
1851         used_IS = &ISForFaces[i-n_ISForEdges];
1852       }
1853       ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1854       total_counts += j;
1855       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1856     }
1857     total_counts *= max_constraints;
1858     total_counts += n_vertices;
1859     ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1860     /* get local part of global near null space vectors */
1861     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1862     for (k=0;k<nnsp_size;k++) {
1863       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1864       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1865       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1866     }
1867 
1868     /* whether or not to skip lapack calls */
1869     skip_lapack = PETSC_TRUE;
1870     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
1871 
1872     /* allocate some auxiliary stuff */
1873     if (!skip_lapack || pcbddc->use_qr_single) {
1874       ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr);
1875     }
1876 
1877     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1878     if (!skip_lapack) {
1879       PetscScalar temp_work;
1880 
1881 #if defined(PETSC_MISSING_LAPACK_GESVD)
1882       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1883       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1884       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1885       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1886 #if defined(PETSC_USE_COMPLEX)
1887       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1888 #endif
1889       /* now we evaluate the optimal workspace using query with lwork=-1 */
1890       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1891       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1892       lwork = -1;
1893       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1894 #if !defined(PETSC_USE_COMPLEX)
1895       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1896 #else
1897       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1898 #endif
1899       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1900       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1901 #else /* on missing GESVD */
1902       /* SVD */
1903       PetscInt max_n,min_n;
1904       max_n = max_size_of_constraint;
1905       min_n = max_constraints;
1906       if (max_size_of_constraint < max_constraints) {
1907         min_n = max_size_of_constraint;
1908         max_n = max_constraints;
1909       }
1910       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1911 #if defined(PETSC_USE_COMPLEX)
1912       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1913 #endif
1914       /* now we evaluate the optimal workspace using query with lwork=-1 */
1915       lwork = -1;
1916       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1917       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1918       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1919       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1920 #if !defined(PETSC_USE_COMPLEX)
1921       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1922 #else
1923       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1924 #endif
1925       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1926       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1927 #endif /* on missing GESVD */
1928       /* Allocate optimal workspace */
1929       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1930       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
1931     }
1932     /* Now we can loop on constraining sets */
1933     total_counts = 0;
1934     temp_indices[0] = 0;
1935     /* vertices */
1936     if (ISForVertices) {
1937       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1938       if (nnsp_has_cnst) { /* consider all vertices */
1939         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1940         for (i=0;i<n_vertices;i++) {
1941           temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1942           temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1943           total_counts++;
1944         }
1945       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1946         PetscBool used_vertex;
1947         for (i=0;i<n_vertices;i++) {
1948           used_vertex = PETSC_FALSE;
1949           k = 0;
1950           while (!used_vertex && k<nnsp_size) {
1951             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1952             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1953               temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1954               temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1955               temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1956               total_counts++;
1957               used_vertex = PETSC_TRUE;
1958             }
1959             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1960             k++;
1961           }
1962         }
1963       }
1964       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1965       n_vertices = total_counts;
1966     }
1967 
1968     /* edges and faces */
1969     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
1970       if (ncc<n_ISForEdges) {
1971         used_IS = &ISForEdges[ncc];
1972         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1973       } else {
1974         used_IS = &ISForFaces[ncc-n_ISForEdges];
1975         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1976       }
1977       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1978       temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1979       ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1980       ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1981       /* change of basis should not be performed on local periodic nodes */
1982       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1983       if (nnsp_has_cnst) {
1984         PetscScalar quad_value;
1985         temp_constraints++;
1986         if (!pcbddc->use_nnsp_true) {
1987           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1988         } else {
1989           quad_value = 1.0;
1990         }
1991         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1992         for (j=0;j<size_of_constraint;j++) {
1993           temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1994         }
1995         /* sort by global ordering if using lapack subroutines (not needed!) */
1996         if (!skip_lapack || pcbddc->use_qr_single) {
1997           ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
1998           for (j=0;j<size_of_constraint;j++) {
1999             permutation[j]=j;
2000           }
2001           ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2002           for (j=0;j<size_of_constraint;j++) {
2003             if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen");
2004           }
2005           for (j=0;j<size_of_constraint;j++) {
2006             temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2007             temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2008           }
2009           ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2010           ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2011         }
2012         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2013         total_counts++;
2014       }
2015       for (k=0;k<nnsp_size;k++) {
2016         PetscReal real_value;
2017         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2018         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2019         for (j=0;j<size_of_constraint;j++) {
2020           temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
2021         }
2022         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2023         /* check if array is null on the connected component */
2024         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2025         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
2026         if (real_value > 0.0) { /* keep indices and values */
2027           /* sort by global ordering if using lapack subroutines */
2028           if (!skip_lapack || pcbddc->use_qr_single) {
2029             ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
2030             for (j=0;j<size_of_constraint;j++) {
2031               permutation[j]=j;
2032             }
2033             ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2034             for (j=0;j<size_of_constraint;j++) {
2035               temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2036               temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2037             }
2038             ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2039             ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2040           }
2041           temp_constraints++;
2042           temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2043           total_counts++;
2044         }
2045       }
2046       ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2047       valid_constraints = temp_constraints;
2048       if (!pcbddc->use_nnsp_true && temp_constraints) {
2049         if (temp_constraints == 1) { /* just normalize the constraint */
2050           PetscScalar norm;
2051           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2052           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2053           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2054           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2055         } else { /* perform SVD */
2056           PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2057 
2058 #if defined(PETSC_MISSING_LAPACK_GESVD)
2059           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2060              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2061              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2062                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2063                 from that computed using LAPACKgesvd
2064              -> This is due to a different computation of eigenvectors in LAPACKheev
2065              -> The quality of the POD-computed basis will be the same */
2066           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2067           /* Store upper triangular part of correlation matrix */
2068           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2069           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2070           for (j=0;j<temp_constraints;j++) {
2071             for (k=0;k<j+1;k++) {
2072               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
2073             }
2074           }
2075           /* compute eigenvalues and eigenvectors of correlation matrix */
2076           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2077           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2078 #if !defined(PETSC_USE_COMPLEX)
2079           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2080 #else
2081           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2082 #endif
2083           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2084           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2085           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2086           j = 0;
2087           while (j < temp_constraints && singular_vals[j] < tol) j++;
2088           total_counts = total_counts-j;
2089           valid_constraints = temp_constraints-j;
2090           /* scale and copy POD basis into used quadrature memory */
2091           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2092           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2093           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2094           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2095           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2096           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2097           if (j<temp_constraints) {
2098             PetscInt ii;
2099             for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
2100             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2101             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2102             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2103             for (k=0;k<temp_constraints-j;k++) {
2104               for (ii=0;ii<size_of_constraint;ii++) {
2105                 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2106               }
2107             }
2108           }
2109 #else  /* on missing GESVD */
2110           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2111           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2112           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2113           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2114 #if !defined(PETSC_USE_COMPLEX)
2115           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
2116 #else
2117           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
2118 #endif
2119           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2120           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2121           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2122           k = temp_constraints;
2123           if (k > size_of_constraint) k = size_of_constraint;
2124           j = 0;
2125           while (j < k && singular_vals[k-j-1] < tol) j++;
2126           valid_constraints = k-j;
2127           total_counts = total_counts-temp_constraints+valid_constraints;
2128 #endif /* on missing GESVD */
2129         }
2130       }
2131       /* setting change_of_basis flag is safe now */
2132       if (boolforchange) {
2133         for (j=0;j<valid_constraints;j++) {
2134           PetscBTSet(change_basis,total_counts-j-1);
2135         }
2136       }
2137     }
2138     /* free workspace */
2139     if (!skip_lapack || pcbddc->use_qr_single) {
2140       ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr);
2141     }
2142     if (!skip_lapack) {
2143       ierr = PetscFree(work);CHKERRQ(ierr);
2144 #if defined(PETSC_USE_COMPLEX)
2145       ierr = PetscFree(rwork);CHKERRQ(ierr);
2146 #endif
2147       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2148 #if defined(PETSC_MISSING_LAPACK_GESVD)
2149       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2150       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2151 #endif
2152     }
2153     for (k=0;k<nnsp_size;k++) {
2154       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2155     }
2156     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2157   } else {
2158     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2159     PetscInt        cum = 0;
2160 
2161     total_counts = 0;
2162     n_vertices = 0;
2163     if (sub_schurs->is_Ej_com) {
2164       ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr);
2165     }
2166     max_constraints = 0;
2167     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2168       total_counts += pcbddc->adaptive_constraints_n[i];
2169       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2170     }
2171     temp_indices = pcbddc->adaptive_constraints_ptrs;
2172     temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs;
2173     temp_quadrature_constraint = pcbddc->adaptive_constraints_data;
2174 
2175 #if 0
2176     printf("Found %d totals\n",total_counts);
2177     for (i=0;i<total_counts;i++) {
2178       printf("const %d, start %d",i,temp_indices[i]);
2179       printf(" end %d:\n",temp_indices[i+1]);
2180       for (j=temp_indices[i];j<temp_indices[i+1];j++) {
2181         printf("  idxs %d",temp_indices_to_constraint[j]);
2182         printf("  data %1.2e\n",temp_quadrature_constraint[j]);
2183       }
2184     }
2185     for (i=0;i<n_vertices;i++) {
2186       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i+n_vertices]);
2187     }
2188     for (i=0;i<sub_schurs->n_subs;i++) {
2189       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2190     }
2191 #endif
2192 
2193     for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]);
2194     ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr);
2195     /* Change of basis */
2196     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2197     if (pcbddc->use_change_of_basis) {
2198       cum = n_vertices;
2199       for (i=0;i<sub_schurs->n_subs;i++) {
2200         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2201           for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) {
2202             ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr);
2203           }
2204         }
2205         cum += pcbddc->adaptive_constraints_n[i+n_vertices];
2206       }
2207     }
2208   }
2209 
2210   /* free index sets of faces, edges and vertices */
2211   for (i=0;i<n_ISForFaces;i++) {
2212     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2213   }
2214   if (n_ISForFaces) {
2215     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2216   }
2217   for (i=0;i<n_ISForEdges;i++) {
2218     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2219   }
2220   if (n_ISForEdges) {
2221     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2222   }
2223   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2224 
2225   /* map temp_indices_to_constraint in boundary numbering */
2226   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
2227   if (i != temp_indices[total_counts]) {
2228     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
2229   }
2230 
2231   /* set quantities in pcbddc data structure and store previous primal size */
2232   /* n_vertices defines the number of subdomain corners in the primal space */
2233   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
2234   olocal_primal_size = pcbddc->local_primal_size;
2235   pcbddc->local_primal_size = total_counts;
2236   pcbddc->n_vertices = n_vertices;
2237   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
2238 
2239   /* Create constraint matrix */
2240   /* The constraint matrix is used to compute the l2g map of primal dofs */
2241   /* so we need to set it up properly either with or without change of basis */
2242   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2243   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2244   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2245   /* array to compute a local numbering of constraints : vertices first then constraints */
2246   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
2247   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
2248   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
2249   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
2250   /* auxiliary stuff for basis change */
2251   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
2252   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
2253 
2254   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2255   total_primal_vertices=0;
2256   for (i=0;i<pcbddc->local_primal_size;i++) {
2257     size_of_constraint=temp_indices[i+1]-temp_indices[i];
2258     if (size_of_constraint == 1) {
2259       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
2260       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
2261       aux_primal_minloc[total_primal_vertices]=0;
2262       total_primal_vertices++;
2263     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
2264       PetscInt min_loc,min_index;
2265       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
2266       /* find first untouched local node */
2267       k = 0;
2268       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
2269       min_index = global_indices[k];
2270       min_loc = k;
2271       /* search the minimum among global nodes already untouched on the cc */
2272       for (k=1;k<size_of_constraint;k++) {
2273         /* there can be more than one constraint on a single connected component */
2274         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
2275           min_index = global_indices[k];
2276           min_loc = k;
2277         }
2278       }
2279       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
2280       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
2281       aux_primal_minloc[total_primal_vertices]=min_loc;
2282       total_primal_vertices++;
2283     }
2284   }
2285   /* determine if a QR strategy is needed for change of basis */
2286   qr_needed = PETSC_FALSE;
2287   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
2288   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2289     if (PetscBTLookup(change_basis,i)) {
2290       if (!pcbddc->use_qr_single) {
2291         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2292         j = 0;
2293         for (k=0;k<size_of_constraint;k++) {
2294           if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
2295             j++;
2296           }
2297         }
2298         /* found more than one primal dof on the cc */
2299         if (j > 1) {
2300           PetscBTSet(qr_needed_idx,i);
2301           qr_needed = PETSC_TRUE;
2302         }
2303       } else {
2304         PetscBTSet(qr_needed_idx,i);
2305         qr_needed = PETSC_TRUE;
2306       }
2307     }
2308   }
2309   /* free workspace */
2310   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2311 
2312   /* permute indices in order to have a sorted set of vertices */
2313   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
2314 
2315   /* nonzero structure of constraint matrix */
2316   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2317   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
2318   j=total_primal_vertices;
2319   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2320     if (!PetscBTLookup(change_basis,i)) {
2321       nnz[j]=temp_indices[i+1]-temp_indices[i];
2322       j++;
2323     }
2324   }
2325   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2326   ierr = PetscFree(nnz);CHKERRQ(ierr);
2327   /* set values in constraint matrix */
2328   for (i=0;i<total_primal_vertices;i++) {
2329     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2330   }
2331   total_counts = total_primal_vertices;
2332   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2333     if (!PetscBTLookup(change_basis,i)) {
2334       size_of_constraint=temp_indices[i+1]-temp_indices[i];
2335       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
2336       total_counts++;
2337     }
2338   }
2339   /* assembling */
2340   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2341   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2342   /*
2343   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2344   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2345   */
2346   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2347   if (pcbddc->use_change_of_basis) {
2348     /* dual and primal dofs on a single cc */
2349     PetscInt     dual_dofs,primal_dofs;
2350     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
2351     PetscInt     primal_counter;
2352     /* working stuff for GEQRF */
2353     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2354     PetscBLASInt lqr_work;
2355     /* working stuff for UNGQR */
2356     PetscScalar  *gqr_work,lgqr_work_t;
2357     PetscBLASInt lgqr_work;
2358     /* working stuff for TRTRS */
2359     PetscScalar  *trs_rhs;
2360     PetscBLASInt Blas_NRHS;
2361     /* pointers for values insertion into change of basis matrix */
2362     PetscInt     *start_rows,*start_cols;
2363     PetscScalar  *start_vals;
2364     /* working stuff for values insertion */
2365     PetscBT      is_primal;
2366     /* matrix sizes */
2367     PetscInt     global_size,local_size;
2368     /* temporary change of basis */
2369     Mat          localChangeOfBasisMatrix;
2370 
2371     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2372     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2373     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2374     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2375     /* nonzeros for local mat */
2376     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2377     for (i=0;i<pcis->n;i++) nnz[i]=1;
2378     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2379       if (PetscBTLookup(change_basis,i)) {
2380         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2381         if (PetscBTLookup(qr_needed_idx,i)) {
2382           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2383         } else {
2384           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2;
2385           /* get local primal index on the cc */
2386           j = 0;
2387           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
2388           nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2389         }
2390       }
2391     }
2392     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2393     ierr = PetscFree(nnz);CHKERRQ(ierr);
2394     /* Set initial identity in the matrix */
2395     for (i=0;i<pcis->n;i++) {
2396       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2397     }
2398 
2399     if (pcbddc->dbg_flag) {
2400       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2401       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2402     }
2403 
2404 
2405     /* Now we loop on the constraints which need a change of basis */
2406     /*
2407        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2408        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2409 
2410        Basic blocks of change of basis matrix T computed by
2411 
2412           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2413 
2414             | 1        0   ...        0         s_1/S |
2415             | 0        1   ...        0         s_2/S |
2416             |              ...                        |
2417             | 0        ...            1     s_{n-1}/S |
2418             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2419 
2420             with S = \sum_{i=1}^n s_i^2
2421             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2422                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2423 
2424           - QR decomposition of constraints otherwise
2425     */
2426     if (qr_needed) {
2427       /* space to store Q */
2428       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2429       /* first we issue queries for optimal work */
2430       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2431       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2432       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2433       lqr_work = -1;
2434       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2435       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2436       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2437       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2438       lgqr_work = -1;
2439       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2440       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2441       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2442       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2443       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2444       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2445       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2446       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2447       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2448       /* array to store scaling factors for reflectors */
2449       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2450       /* array to store rhs and solution of triangular solver */
2451       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2452       /* allocating workspace for check */
2453       if (pcbddc->dbg_flag) {
2454         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr);
2455       }
2456     }
2457     /* array to store whether a node is primal or not */
2458     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2459     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2460     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2461     if (i != total_primal_vertices) {
2462       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2463     }
2464     for (i=0;i<total_primal_vertices;i++) {
2465       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2466     }
2467     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2468 
2469     /* loop on constraints and see whether or not they need a change of basis and compute it */
2470     /* -> using implicit ordering contained in temp_indices data */
2471     total_counts = pcbddc->n_vertices;
2472     primal_counter = total_counts;
2473     while (total_counts<pcbddc->local_primal_size) {
2474       primal_dofs = 1;
2475       if (PetscBTLookup(change_basis,total_counts)) {
2476         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2477         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) {
2478           primal_dofs++;
2479         }
2480         /* get constraint info */
2481         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2482         dual_dofs = size_of_constraint-primal_dofs;
2483 
2484         if (pcbddc->dbg_flag) {
2485           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2486         }
2487 
2488         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2489 
2490           /* copy quadrature constraints for change of basis check */
2491           if (pcbddc->dbg_flag) {
2492             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2493           }
2494           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2495           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2496 
2497           /* compute QR decomposition of constraints */
2498           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2499           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2500           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2501           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2502           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2503           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2504           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2505 
2506           /* explictly compute R^-T */
2507           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2508           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2509           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2510           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2511           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2512           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2513           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2514           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2515           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2516           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2517 
2518           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2519           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2520           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2521           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2522           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2523           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2524           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2525           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2526           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2527 
2528           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2529              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2530              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2531           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2532           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2533           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2534           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2535           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2536           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2537           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2538           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2539           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2540           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2541 
2542           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2543           start_rows = &temp_indices_to_constraint[temp_indices[total_counts]];
2544           /* insert cols for primal dofs */
2545           for (j=0;j<primal_dofs;j++) {
2546             start_vals = &qr_basis[j*size_of_constraint];
2547             start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2548             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2549           }
2550           /* insert cols for dual dofs */
2551           for (j=0,k=0;j<dual_dofs;k++) {
2552             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2553               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2554               start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k];
2555               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2556               j++;
2557             }
2558           }
2559 
2560           /* check change of basis */
2561           if (pcbddc->dbg_flag) {
2562             PetscInt   ii,jj;
2563             PetscBool valid_qr=PETSC_TRUE;
2564             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2565             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2566             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2567             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2568             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2569             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2570             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2571             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2572             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2573             for (jj=0;jj<size_of_constraint;jj++) {
2574               for (ii=0;ii<primal_dofs;ii++) {
2575                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2576                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2577               }
2578             }
2579             if (!valid_qr) {
2580               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2581               for (jj=0;jj<size_of_constraint;jj++) {
2582                 for (ii=0;ii<primal_dofs;ii++) {
2583                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2584                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2585                   }
2586                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2587                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2588                   }
2589                 }
2590               }
2591             } else {
2592               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2593             }
2594           }
2595         } else { /* simple transformation block */
2596           PetscInt    row,col;
2597           PetscScalar val,norm;
2598 
2599           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2600           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one));
2601           for (j=0;j<size_of_constraint;j++) {
2602             PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2603             row = temp_indices_to_constraint[temp_indices[total_counts]+j];
2604             if (!PetscBTLookup(is_primal,row_B)) {
2605               col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2606               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2607               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2608             } else {
2609               for (k=0;k<size_of_constraint;k++) {
2610                 col = temp_indices_to_constraint[temp_indices[total_counts]+k];
2611                 if (row != col) {
2612                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2613                 } else {
2614                   val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm;
2615                 }
2616                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2617               }
2618             }
2619           }
2620           if (pcbddc->dbg_flag) {
2621             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2622           }
2623         }
2624         /* increment primal counter */
2625         primal_counter += primal_dofs;
2626       } else {
2627         if (pcbddc->dbg_flag) {
2628           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2629         }
2630       }
2631       /* increment constraint counter total_counts */
2632       total_counts += primal_dofs;
2633     }
2634 
2635     /* free workspace */
2636     if (qr_needed) {
2637       if (pcbddc->dbg_flag) {
2638         ierr = PetscFree(work);CHKERRQ(ierr);
2639       }
2640       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2641       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2642       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2643       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2644       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2645     }
2646     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2647     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2648     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2649 
2650     /* assembling of global change of variable */
2651     {
2652       Mat      tmat;
2653       PetscInt bs;
2654 
2655       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2656       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2657       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2658       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2659       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2660       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2661       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
2662       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
2663       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2664       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2665       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2666       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2667       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2668       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2669       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2670       ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2671       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2672       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2673     }
2674     /* check */
2675     if (pcbddc->dbg_flag) {
2676       PetscReal error;
2677       Vec       x,x_change;
2678 
2679       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2680       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2681       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2682       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2683       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2684       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2685       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2686       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2687       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2688       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2689       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2690       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2691       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2692       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2693       ierr = VecDestroy(&x);CHKERRQ(ierr);
2694       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2695     }
2696 
2697     /* adapt sub_schurs computed (if any) */
2698     if (pcbddc->use_deluxe_scaling) {
2699       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2700       if (sub_schurs->n_subs_par_g) {
2701         SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Change of basis with deluxe scaling and parallel problems still needs to be implemented");
2702       }
2703       if (sub_schurs->S_Ej_all) {
2704         Mat S_1,S_2,tmat;
2705         IS is_all_N;
2706 
2707         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2708         ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
2709         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2710         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr);
2711         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2712         sub_schurs->S_Ej_all = S_1;
2713         ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr);
2714         ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2715         sub_schurs->sum_S_Ej_all = S_2;
2716         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2717       }
2718     }
2719     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2720   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2721     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2722     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2723   }
2724 
2725   /* set up change of basis context */
2726   if (pcbddc->ChangeOfBasisMatrix) {
2727     PCBDDCChange_ctx change_ctx;
2728 
2729     if (!pcbddc->new_global_mat) {
2730       PetscInt global_size,local_size;
2731 
2732       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2733       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2734       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2735       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2736       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2737       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2738       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2739       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2740       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2741     } else {
2742       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2743       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2744       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2745     }
2746     if (!pcbddc->user_ChangeOfBasisMatrix) {
2747       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2748       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2749     } else {
2750       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2751       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2752     }
2753     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2754     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2755     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2756     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2757   }
2758 
2759   /* get indices in local ordering for vertices and constraints */
2760   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2761     ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
2762     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2763   }
2764   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2765   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2766   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2767   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2768   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2769   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2770   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2771   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2772   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2773   /* set quantities in PCBDDC data struct */
2774   pcbddc->n_actual_vertices = i;
2775   /* check if a new primal space has been introduced */
2776   pcbddc->new_primal_space_local = PETSC_TRUE;
2777   if (olocal_primal_size == pcbddc->local_primal_size) {
2778     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2779     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2780     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2781   }
2782   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2783   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2784 
2785   /* flush dbg viewer */
2786   if (pcbddc->dbg_flag) {
2787     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2788   }
2789 
2790   /* free workspace */
2791   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2792   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2793   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2794   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2795   if (!pcbddc->adaptive_selection) {
2796     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2797     ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr);
2798   } else {
2799     ierr = PetscFree4(pcbddc->adaptive_constraints_n,
2800                       pcbddc->adaptive_constraints_ptrs,
2801                       pcbddc->adaptive_constraints_idxs,
2802                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2803     ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2804   }
2805   PetscFunctionReturn(0);
2806 }
2807 
2808 #undef __FUNCT__
2809 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2810 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2811 {
2812   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2813   PC_IS       *pcis = (PC_IS*)pc->data;
2814   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2815   PetscInt    ierr,i,vertex_size;
2816   PetscViewer viewer=pcbddc->dbg_viewer;
2817 
2818   PetscFunctionBegin;
2819   /* Reset previously computed graph */
2820   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2821   /* Init local Graph struct */
2822   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2823 
2824   /* Check validity of the csr graph passed in by the user */
2825   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2826     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2827   }
2828 
2829   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2830   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2831     PetscInt *xadj,*adjncy;
2832     PetscInt nvtxs;
2833 
2834     if (pcbddc->use_local_adj) {
2835       PetscBool flg_row=PETSC_FALSE;
2836 
2837       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2838       if (flg_row) {
2839         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2840         pcbddc->computed_rowadj = PETSC_TRUE;
2841       }
2842       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2843     } else if (pcbddc->current_level) { /* just compute subdomain's connected components for coarser levels */
2844       IS                     is_dummy;
2845       ISLocalToGlobalMapping l2gmap_dummy;
2846       PetscInt               j,sum;
2847       PetscInt               *cxadj,*cadjncy;
2848       const PetscInt         *idxs;
2849       PCBDDCGraph            graph;
2850       PetscBT                is_on_boundary;
2851 
2852       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
2853       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
2854       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
2855       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
2856       ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr);
2857       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
2858       graph->xadj = xadj;
2859       graph->adjncy = adjncy;
2860       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
2861       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
2862 
2863       if (pcbddc->dbg_flag) {
2864         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr);
2865         for (i=0;i<graph->ncc;i++) {
2866           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
2867         }
2868         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2869       }
2870 
2871       ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr);
2872       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2873       for (i=0;i<pcis->n_B;i++) {
2874         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
2875       }
2876       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2877 
2878       ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr);
2879       sum = 0;
2880       for (i=0;i<graph->ncc;i++) {
2881         PetscInt sizecc = 0;
2882         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2883           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2884             sizecc++;
2885           }
2886         }
2887         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2888           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2889             cxadj[graph->queue[j]] = sizecc;
2890           }
2891         }
2892         sum += sizecc*sizecc;
2893       }
2894       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
2895       sum = 0;
2896       for (i=0;i<nvtxs;i++) {
2897         PetscInt temp = cxadj[i];
2898         cxadj[i] = sum;
2899         sum += temp;
2900       }
2901       cxadj[nvtxs] = sum;
2902       for (i=0;i<graph->ncc;i++) {
2903         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2904           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2905             PetscInt k,sizecc = 0;
2906             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
2907               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
2908                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
2909                 sizecc++;
2910               }
2911             }
2912           }
2913         }
2914       }
2915       if (nvtxs) {
2916         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
2917       } else {
2918         ierr = PetscFree(cxadj);CHKERRQ(ierr);
2919         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
2920       }
2921       graph->xadj = 0;
2922       graph->adjncy = 0;
2923       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
2924       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
2925     }
2926   }
2927 
2928   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2929   vertex_size = 1;
2930   if (pcbddc->user_provided_isfordofs) {
2931     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2932       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2933       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2934         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2935         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2936       }
2937       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2938       pcbddc->n_ISForDofs = 0;
2939       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2940     }
2941     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2942     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2943   } else {
2944     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2945       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2946       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2947       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2948         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2949       }
2950     }
2951   }
2952 
2953   /* Setup of Graph */
2954   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2955     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2956   }
2957   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2958     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2959   }
2960   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
2961 
2962   /* Graph's connected components analysis */
2963   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2964 
2965   /* print some info to stdout */
2966   if (pcbddc->dbg_flag) {
2967     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2968   }
2969 
2970   /* mark topography has done */
2971   pcbddc->recompute_topography = PETSC_FALSE;
2972   PetscFunctionReturn(0);
2973 }
2974 
2975 #undef __FUNCT__
2976 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2977 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2978 {
2979   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2980   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2981   PetscErrorCode ierr;
2982 
2983   PetscFunctionBegin;
2984   n = 0;
2985   vertices = 0;
2986   if (pcbddc->ConstraintMatrix) {
2987     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2988     for (i=0;i<local_primal_size;i++) {
2989       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2990       if (size_of_constraint == 1) n++;
2991       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2992     }
2993     if (vertices_idx) {
2994       ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr);
2995       n = 0;
2996       for (i=0;i<local_primal_size;i++) {
2997         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2998         if (size_of_constraint == 1) {
2999           vertices[n++]=row_cmat_indices[0];
3000         }
3001         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3002       }
3003     }
3004   }
3005   *n_vertices = n;
3006   if (vertices_idx) *vertices_idx = vertices;
3007   PetscFunctionReturn(0);
3008 }
3009 
3010 #undef __FUNCT__
3011 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
3012 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
3013 {
3014   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
3015   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
3016   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
3017   PetscBT        touched;
3018   PetscErrorCode ierr;
3019 
3020     /* This function assumes that the number of local constraints per connected component
3021        is not greater than the number of nodes defined for the connected component
3022        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
3023   PetscFunctionBegin;
3024   n = 0;
3025   constraints_index = 0;
3026   if (pcbddc->ConstraintMatrix) {
3027     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
3028     max_size_of_constraint = 0;
3029     for (i=0;i<local_primal_size;i++) {
3030       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3031       if (size_of_constraint > 1) {
3032         n++;
3033       }
3034       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
3035       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3036     }
3037     if (constraints_idx) {
3038       ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr);
3039       ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr);
3040       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
3041       n = 0;
3042       for (i=0;i<local_primal_size;i++) {
3043         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3044         if (size_of_constraint > 1) {
3045           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
3046           /* find first untouched local node */
3047           j = 0;
3048           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
3049           min_index = row_cmat_global_indices[j];
3050           min_loc = j;
3051           /* search the minimum among nodes not yet touched on the connected component
3052              since there can be more than one constraint on a single cc */
3053           for (j=1;j<size_of_constraint;j++) {
3054             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
3055               min_index = row_cmat_global_indices[j];
3056               min_loc = j;
3057             }
3058           }
3059           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
3060           constraints_index[n++] = row_cmat_indices[min_loc];
3061         }
3062         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3063       }
3064       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
3065       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
3066     }
3067   }
3068   *n_constraints = n;
3069   if (constraints_idx) *constraints_idx = constraints_index;
3070   PetscFunctionReturn(0);
3071 }
3072 
3073 #undef __FUNCT__
3074 #define __FUNCT__ "PCBDDCSubsetNumbering"
3075 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
3076 {
3077   Vec            local_vec,global_vec;
3078   IS             seqis,paris;
3079   VecScatter     scatter_ctx;
3080   PetscScalar    *array;
3081   PetscInt       *temp_global_dofs;
3082   PetscScalar    globalsum;
3083   PetscInt       i,j,s;
3084   PetscInt       nlocals,first_index,old_index,max_local;
3085   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
3086   PetscMPIInt    *dof_sizes,*dof_displs;
3087   PetscBool      first_found;
3088   PetscErrorCode ierr;
3089 
3090   PetscFunctionBegin;
3091   /* mpi buffers */
3092   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
3093   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
3094   j = ( !rank_prec_comm ? size_prec_comm : 0);
3095   ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr);
3096   ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr);
3097   /* get maximum size of subset */
3098   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
3099   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
3100   max_local = 0;
3101   for (i=0;i<n_local_dofs;i++) {
3102     if (max_local < temp_global_dofs[i] ) {
3103       max_local = temp_global_dofs[i];
3104     }
3105   }
3106   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
3107   max_global++;
3108   max_local = 0;
3109   for (i=0;i<n_local_dofs;i++) {
3110     if (max_local < local_dofs[i] ) {
3111       max_local = local_dofs[i];
3112     }
3113   }
3114   max_local++;
3115   /* allocate workspace */
3116   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
3117   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
3118   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
3119   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
3120   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
3121   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
3122   /* create scatter */
3123   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
3124   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
3125   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
3126   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
3127   ierr = ISDestroy(&paris);CHKERRQ(ierr);
3128   /* init array */
3129   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
3130   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3131   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3132   if (local_dofs_mult) {
3133     for (i=0;i<n_local_dofs;i++) {
3134       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
3135     }
3136   } else {
3137     for (i=0;i<n_local_dofs;i++) {
3138       array[local_dofs[i]]=1.0;
3139     }
3140   }
3141   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3142   /* scatter into global vec and get total number of global dofs */
3143   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3144   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3145   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
3146   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
3147   /* Fill global_vec with cumulative function for global numbering */
3148   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
3149   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
3150   nlocals = 0;
3151   first_index = -1;
3152   first_found = PETSC_FALSE;
3153   for (i=0;i<s;i++) {
3154     if (!first_found && PetscRealPart(array[i]) > 0.1) {
3155       first_found = PETSC_TRUE;
3156       first_index = i;
3157     }
3158     nlocals += (PetscInt)PetscRealPart(array[i]);
3159   }
3160   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3161   if (!rank_prec_comm) {
3162     dof_displs[0]=0;
3163     for (i=1;i<size_prec_comm;i++) {
3164       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3165     }
3166   }
3167   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3168   if (first_found) {
3169     array[first_index] += (PetscScalar)nlocals;
3170     old_index = first_index;
3171     for (i=first_index+1;i<s;i++) {
3172       if (PetscRealPart(array[i]) > 0.1) {
3173         array[i] += array[old_index];
3174         old_index = i;
3175       }
3176     }
3177   }
3178   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
3179   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3180   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3181   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3182   /* get global ordering of local dofs */
3183   ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3184   if (local_dofs_mult) {
3185     for (i=0;i<n_local_dofs;i++) {
3186       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
3187     }
3188   } else {
3189     for (i=0;i<n_local_dofs;i++) {
3190       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
3191     }
3192   }
3193   ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3194   /* free workspace */
3195   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
3196   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
3197   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
3198   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
3199   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
3200   /* return pointer to global ordering of local dofs */
3201   *global_numbering_subset = temp_global_dofs;
3202   PetscFunctionReturn(0);
3203 }
3204 
3205 #undef __FUNCT__
3206 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3207 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3208 {
3209   PetscInt       i,j;
3210   PetscScalar    *alphas;
3211   PetscErrorCode ierr;
3212 
3213   PetscFunctionBegin;
3214   /* this implements stabilized Gram-Schmidt */
3215   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3216   for (i=0;i<n;i++) {
3217     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3218     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3219     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3220   }
3221   ierr = PetscFree(alphas);CHKERRQ(ierr);
3222   PetscFunctionReturn(0);
3223 }
3224 
3225 #undef __FUNCT__
3226 #define __FUNCT__ "MatISGetSubassemblingPattern"
3227 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
3228 {
3229   Mat             subdomain_adj;
3230   IS              new_ranks,ranks_send_to;
3231   MatPartitioning partitioner;
3232   Mat_IS          *matis;
3233   PetscInt        n_neighs,*neighs,*n_shared,**shared;
3234   PetscInt        prank;
3235   PetscMPIInt     size,rank,color;
3236   PetscInt        *xadj,*adjncy,*oldranks;
3237   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
3238   PetscInt        i,local_size,threshold=0;
3239   PetscErrorCode  ierr;
3240   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3241   PetscSubcomm    subcomm;
3242 
3243   PetscFunctionBegin;
3244   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3245   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3246   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3247 
3248   /* Get info on mapping */
3249   matis = (Mat_IS*)(mat->data);
3250   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3251   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3252 
3253   /* build local CSR graph of subdomains' connectivity */
3254   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3255   xadj[0] = 0;
3256   xadj[1] = PetscMax(n_neighs-1,0);
3257   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3258   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3259 
3260   if (threshold) {
3261     PetscInt xadj_count = 0;
3262     for (i=1;i<n_neighs;i++) {
3263       if (n_shared[i] > threshold) {
3264         adjncy[xadj_count] = neighs[i];
3265         adjncy_wgt[xadj_count] = n_shared[i];
3266         xadj_count++;
3267       }
3268     }
3269     xadj[1] = xadj_count;
3270   } else {
3271     if (xadj[1]) {
3272       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3273       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3274     }
3275   }
3276   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3277   if (use_square) {
3278     for (i=0;i<xadj[1];i++) {
3279       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3280     }
3281   }
3282   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3283 
3284   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3285 
3286   /*
3287     Restrict work on active processes only.
3288   */
3289   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3290   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3291   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3292   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3293   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3294   if (color) {
3295     ierr = PetscFree(xadj);CHKERRQ(ierr);
3296     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3297     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3298   } else {
3299     PetscInt coarsening_ratio;
3300     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
3301     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3302     prank = rank;
3303     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
3304     /*
3305     for (i=0;i<size;i++) {
3306       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3307     }
3308     */
3309     for (i=0;i<xadj[1];i++) {
3310       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3311     }
3312     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3313     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3314     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3315 
3316     /* Partition */
3317     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
3318     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3319     if (use_vwgt) {
3320       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3321       v_wgt[0] = local_size;
3322       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3323     }
3324     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3325     coarsening_ratio = size/n_subdomains;
3326     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3327     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3328     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3329     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3330 
3331     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3332     if (contiguous) {
3333       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3334     } else {
3335       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3336     }
3337     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3338     /* clean up */
3339     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3340     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3341     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3342     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3343   }
3344   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3345 
3346   /* assemble parallel IS for sends */
3347   i = 1;
3348   if (color) i=0;
3349   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3350 
3351   /* get back IS */
3352   *is_sends = ranks_send_to;
3353   PetscFunctionReturn(0);
3354 }
3355 
3356 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3357 
3358 #undef __FUNCT__
3359 #define __FUNCT__ "MatISSubassemble"
3360 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3361 {
3362   Mat                    local_mat;
3363   Mat_IS                 *matis;
3364   IS                     is_sends_internal;
3365   PetscInt               rows,cols,new_local_rows;
3366   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3367   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3368   ISLocalToGlobalMapping l2gmap;
3369   PetscInt*              l2gmap_indices;
3370   const PetscInt*        is_indices;
3371   MatType                new_local_type;
3372   /* buffers */
3373   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3374   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3375   PetscInt               *recv_buffer_idxs_local;
3376   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3377   /* MPI */
3378   MPI_Comm               comm,comm_n;
3379   PetscSubcomm           subcomm;
3380   PetscMPIInt            n_sends,n_recvs,commsize;
3381   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3382   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3383   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3384   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3385   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3386   PetscErrorCode         ierr;
3387 
3388   PetscFunctionBegin;
3389   /* TODO: add missing checks */
3390   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3391   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3392   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3393   PetscValidLogicalCollectiveInt(mat,nis,7);
3394   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3395   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3396   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3397   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3398   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3399   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3400   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3401   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3402     PetscInt mrows,mcols,mnrows,mncols;
3403     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3404     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3405     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3406     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3407     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3408     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3409   }
3410   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3411   PetscValidLogicalCollectiveInt(mat,bs,0);
3412   /* prepare IS for sending if not provided */
3413   if (!is_sends) {
3414     PetscBool pcontig = PETSC_TRUE;
3415     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3416     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3417   } else {
3418     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3419     is_sends_internal = is_sends;
3420   }
3421 
3422   /* get pointer of MATIS data */
3423   matis = (Mat_IS*)mat->data;
3424 
3425   /* get comm */
3426   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3427 
3428   /* compute number of sends */
3429   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3430   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3431 
3432   /* compute number of receives */
3433   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3434   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3435   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3436   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3437   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3438   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3439   ierr = PetscFree(iflags);CHKERRQ(ierr);
3440 
3441   /* restrict comm if requested */
3442   subcomm = 0;
3443   destroy_mat = PETSC_FALSE;
3444   if (restrict_comm) {
3445     PetscMPIInt color,subcommsize;
3446 
3447     color = 0;
3448     if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */
3449     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3450     subcommsize = commsize - subcommsize;
3451     /* check if reuse has been requested */
3452     if (reuse == MAT_REUSE_MATRIX) {
3453       if (*mat_n) {
3454         PetscMPIInt subcommsize2;
3455         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3456         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3457         comm_n = PetscObjectComm((PetscObject)*mat_n);
3458       } else {
3459         comm_n = PETSC_COMM_SELF;
3460       }
3461     } else { /* MAT_INITIAL_MATRIX */
3462       PetscMPIInt rank;
3463 
3464       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3465       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3466       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3467       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3468       comm_n = subcomm->comm;
3469     }
3470     /* flag to destroy *mat_n if not significative */
3471     if (color) destroy_mat = PETSC_TRUE;
3472   } else {
3473     comm_n = comm;
3474   }
3475 
3476   /* prepare send/receive buffers */
3477   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3478   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3479   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3480   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3481   if (nis) {
3482     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3483   }
3484 
3485   /* Get data from local matrices */
3486   if (!isdense) {
3487     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3488     /* TODO: See below some guidelines on how to prepare the local buffers */
3489     /*
3490        send_buffer_vals should contain the raw values of the local matrix
3491        send_buffer_idxs should contain:
3492        - MatType_PRIVATE type
3493        - PetscInt        size_of_l2gmap
3494        - PetscInt        global_row_indices[size_of_l2gmap]
3495        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3496     */
3497   } else {
3498     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3499     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3500     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3501     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3502     send_buffer_idxs[1] = i;
3503     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3504     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3505     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3506     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3507     for (i=0;i<n_sends;i++) {
3508       ilengths_vals[is_indices[i]] = len*len;
3509       ilengths_idxs[is_indices[i]] = len+2;
3510     }
3511   }
3512   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3513   /* additional is (if any) */
3514   if (nis) {
3515     PetscMPIInt psum;
3516     PetscInt j;
3517     for (j=0,psum=0;j<nis;j++) {
3518       PetscInt plen;
3519       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3520       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3521       psum += len+1; /* indices + lenght */
3522     }
3523     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3524     for (j=0,psum=0;j<nis;j++) {
3525       PetscInt plen;
3526       const PetscInt *is_array_idxs;
3527       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3528       send_buffer_idxs_is[psum] = plen;
3529       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3530       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3531       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3532       psum += plen+1; /* indices + lenght */
3533     }
3534     for (i=0;i<n_sends;i++) {
3535       ilengths_idxs_is[is_indices[i]] = psum;
3536     }
3537     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3538   }
3539 
3540   buf_size_idxs = 0;
3541   buf_size_vals = 0;
3542   buf_size_idxs_is = 0;
3543   for (i=0;i<n_recvs;i++) {
3544     buf_size_idxs += (PetscInt)olengths_idxs[i];
3545     buf_size_vals += (PetscInt)olengths_vals[i];
3546     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3547   }
3548   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3549   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3550   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3551 
3552   /* get new tags for clean communications */
3553   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3554   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3555   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3556 
3557   /* allocate for requests */
3558   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3559   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3560   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3561   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3562   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3563   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3564 
3565   /* communications */
3566   ptr_idxs = recv_buffer_idxs;
3567   ptr_vals = recv_buffer_vals;
3568   ptr_idxs_is = recv_buffer_idxs_is;
3569   for (i=0;i<n_recvs;i++) {
3570     source_dest = onodes[i];
3571     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3572     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3573     ptr_idxs += olengths_idxs[i];
3574     ptr_vals += olengths_vals[i];
3575     if (nis) {
3576       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3577       ptr_idxs_is += olengths_idxs_is[i];
3578     }
3579   }
3580   for (i=0;i<n_sends;i++) {
3581     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3582     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3583     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3584     if (nis) {
3585       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3586     }
3587   }
3588   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3589   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3590 
3591   /* assemble new l2g map */
3592   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3593   ptr_idxs = recv_buffer_idxs;
3594   new_local_rows = 0;
3595   for (i=0;i<n_recvs;i++) {
3596     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3597     ptr_idxs += olengths_idxs[i];
3598   }
3599   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3600   ptr_idxs = recv_buffer_idxs;
3601   new_local_rows = 0;
3602   for (i=0;i<n_recvs;i++) {
3603     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3604     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3605     ptr_idxs += olengths_idxs[i];
3606   }
3607   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3608   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3609   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3610 
3611   /* infer new local matrix type from received local matrices type */
3612   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3613   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3614   if (n_recvs) {
3615     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3616     ptr_idxs = recv_buffer_idxs;
3617     for (i=0;i<n_recvs;i++) {
3618       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3619         new_local_type_private = MATAIJ_PRIVATE;
3620         break;
3621       }
3622       ptr_idxs += olengths_idxs[i];
3623     }
3624     switch (new_local_type_private) {
3625       case MATDENSE_PRIVATE:
3626         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3627           new_local_type = MATSEQAIJ;
3628           bs = 1;
3629         } else { /* if I receive only 1 dense matrix */
3630           new_local_type = MATSEQDENSE;
3631           bs = 1;
3632         }
3633         break;
3634       case MATAIJ_PRIVATE:
3635         new_local_type = MATSEQAIJ;
3636         bs = 1;
3637         break;
3638       case MATBAIJ_PRIVATE:
3639         new_local_type = MATSEQBAIJ;
3640         break;
3641       case MATSBAIJ_PRIVATE:
3642         new_local_type = MATSEQSBAIJ;
3643         break;
3644       default:
3645         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3646         break;
3647     }
3648   } else { /* by default, new_local_type is seqdense */
3649     new_local_type = MATSEQDENSE;
3650     bs = 1;
3651   }
3652 
3653   /* create MATIS object if needed */
3654   if (reuse == MAT_INITIAL_MATRIX) {
3655     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3656     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3657   } else {
3658     /* it also destroys the local matrices */
3659     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3660   }
3661   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3662   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3663 
3664   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3665 
3666   /* Global to local map of received indices */
3667   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3668   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3669   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3670 
3671   /* restore attributes -> type of incoming data and its size */
3672   buf_size_idxs = 0;
3673   for (i=0;i<n_recvs;i++) {
3674     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3675     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3676     buf_size_idxs += (PetscInt)olengths_idxs[i];
3677   }
3678   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3679 
3680   /* set preallocation */
3681   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3682   if (!newisdense) {
3683     PetscInt *new_local_nnz=0;
3684 
3685     ptr_vals = recv_buffer_vals;
3686     ptr_idxs = recv_buffer_idxs_local;
3687     if (n_recvs) {
3688       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3689     }
3690     for (i=0;i<n_recvs;i++) {
3691       PetscInt j;
3692       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3693         for (j=0;j<*(ptr_idxs+1);j++) {
3694           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3695         }
3696       } else {
3697         /* TODO */
3698       }
3699       ptr_idxs += olengths_idxs[i];
3700     }
3701     if (new_local_nnz) {
3702       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3703       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3704       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3705       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3706       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3707       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3708     } else {
3709       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3710     }
3711     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3712   } else {
3713     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3714   }
3715 
3716   /* set values */
3717   ptr_vals = recv_buffer_vals;
3718   ptr_idxs = recv_buffer_idxs_local;
3719   for (i=0;i<n_recvs;i++) {
3720     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3721       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3722       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3723       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3724       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3725       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3726     } else {
3727       /* TODO */
3728     }
3729     ptr_idxs += olengths_idxs[i];
3730     ptr_vals += olengths_vals[i];
3731   }
3732   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3733   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3734   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3735   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3736   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3737   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3738 
3739 #if 0
3740   if (!restrict_comm) { /* check */
3741     Vec       lvec,rvec;
3742     PetscReal infty_error;
3743 
3744     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3745     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3746     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3747     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3748     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3749     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3750     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3751     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3752     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3753   }
3754 #endif
3755 
3756   /* assemble new additional is (if any) */
3757   if (nis) {
3758     PetscInt **temp_idxs,*count_is,j,psum;
3759 
3760     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3761     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3762     ptr_idxs = recv_buffer_idxs_is;
3763     psum = 0;
3764     for (i=0;i<n_recvs;i++) {
3765       for (j=0;j<nis;j++) {
3766         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3767         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3768         psum += plen;
3769         ptr_idxs += plen+1; /* shift pointer to received data */
3770       }
3771     }
3772     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3773     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3774     for (i=1;i<nis;i++) {
3775       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3776     }
3777     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3778     ptr_idxs = recv_buffer_idxs_is;
3779     for (i=0;i<n_recvs;i++) {
3780       for (j=0;j<nis;j++) {
3781         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3782         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3783         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3784         ptr_idxs += plen+1; /* shift pointer to received data */
3785       }
3786     }
3787     for (i=0;i<nis;i++) {
3788       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3789       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3790       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3791     }
3792     ierr = PetscFree(count_is);CHKERRQ(ierr);
3793     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3794     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3795   }
3796   /* free workspace */
3797   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3798   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3799   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3800   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3801   if (isdense) {
3802     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3803     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3804   } else {
3805     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3806   }
3807   if (nis) {
3808     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3809     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3810   }
3811   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3812   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3813   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3814   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3815   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3816   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3817   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3818   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3819   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3820   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3821   ierr = PetscFree(onodes);CHKERRQ(ierr);
3822   if (nis) {
3823     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3824     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3825     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3826   }
3827   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3828   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3829     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3830     for (i=0;i<nis;i++) {
3831       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3832     }
3833   }
3834   PetscFunctionReturn(0);
3835 }
3836 
3837 /* temporary hack into ksp private data structure */
3838 #include <petsc-private/kspimpl.h>
3839 
3840 #undef __FUNCT__
3841 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3842 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3843 {
3844   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3845   PC_IS                  *pcis = (PC_IS*)pc->data;
3846   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3847   MatNullSpace           CoarseNullSpace=NULL;
3848   ISLocalToGlobalMapping coarse_islg;
3849   IS                     coarse_is,*isarray;
3850   PetscInt               i,im_active=-1,active_procs=-1;
3851   PetscInt               nis,nisdofs,nisneu;
3852   PC                     pc_temp;
3853   PCType                 coarse_pc_type;
3854   KSPType                coarse_ksp_type;
3855   PetscBool              multilevel_requested,multilevel_allowed;
3856   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3857   Mat                    t_coarse_mat_is;
3858   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3859   PetscMPIInt            all_procs;
3860   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3861   PetscBool              compute_vecs = PETSC_FALSE;
3862   PetscScalar            *array;
3863   PetscErrorCode         ierr;
3864 
3865   PetscFunctionBegin;
3866   /* Assign global numbering to coarse dofs */
3867   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3868     compute_vecs = PETSC_TRUE;
3869     PetscInt ocoarse_size;
3870     ocoarse_size = pcbddc->coarse_size;
3871     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3872     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3873     /* see if we can avoid some work */
3874     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3875       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3876         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3877         coarse_reuse = PETSC_FALSE;
3878       } else { /* we can safely reuse already computed coarse matrix */
3879         coarse_reuse = PETSC_TRUE;
3880       }
3881     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3882       coarse_reuse = PETSC_FALSE;
3883     }
3884     /* reset any subassembling information */
3885     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3886     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3887   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3888     coarse_reuse = PETSC_TRUE;
3889   }
3890 
3891   /* count "active" (i.e. with positive local size) and "void" processes */
3892   im_active = !!(pcis->n);
3893   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3894   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3895   void_procs = all_procs-active_procs;
3896   csin_type_simple = PETSC_TRUE;
3897   redist = PETSC_FALSE;
3898   if (pcbddc->current_level && void_procs) {
3899     csin_ml = PETSC_TRUE;
3900     ncoarse_ml = void_procs;
3901     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
3902     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
3903       csin_ds = PETSC_TRUE;
3904       ncoarse_ds = pcbddc->redistribute_coarse;
3905       redist = PETSC_TRUE;
3906     } else {
3907       csin_ds = PETSC_TRUE;
3908       ncoarse_ds = active_procs;
3909       redist = PETSC_TRUE;
3910     }
3911   } else {
3912     csin_ml = PETSC_FALSE;
3913     ncoarse_ml = all_procs;
3914     if (void_procs) {
3915       csin_ds = PETSC_TRUE;
3916       ncoarse_ds = void_procs;
3917       csin_type_simple = PETSC_FALSE;
3918     } else {
3919       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
3920         csin_ds = PETSC_TRUE;
3921         ncoarse_ds = pcbddc->redistribute_coarse;
3922         redist = PETSC_TRUE;
3923       } else {
3924         csin_ds = PETSC_FALSE;
3925         ncoarse_ds = all_procs;
3926       }
3927     }
3928   }
3929 
3930   /*
3931     test if we can go multilevel: three conditions must be satisfied:
3932     - we have not exceeded the number of levels requested
3933     - we can actually subassemble the active processes
3934     - we can find a suitable number of MPI processes where we can place the subassembled problem
3935   */
3936   multilevel_allowed = PETSC_FALSE;
3937   multilevel_requested = PETSC_FALSE;
3938   if (pcbddc->current_level < pcbddc->max_levels) {
3939     multilevel_requested = PETSC_TRUE;
3940     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3941       multilevel_allowed = PETSC_FALSE;
3942     } else {
3943       multilevel_allowed = PETSC_TRUE;
3944     }
3945   }
3946   /* determine number of process partecipating to coarse solver */
3947   if (multilevel_allowed) {
3948     ncoarse = ncoarse_ml;
3949     csin = csin_ml;
3950     redist = PETSC_FALSE;
3951   } else {
3952     ncoarse = ncoarse_ds;
3953     csin = csin_ds;
3954   }
3955 
3956   /* creates temporary l2gmap and IS for coarse indexes */
3957   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3958   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3959 
3960   /* creates temporary MATIS object for coarse matrix */
3961   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
3962   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3963   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
3964   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3965 #if 0
3966   {
3967     PetscViewer viewer;
3968     char filename[256];
3969     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
3970     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
3971     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3972     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
3973     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3974   }
3975 #endif
3976   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
3977   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3978   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3979   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3980   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3981 
3982   /* compute dofs splitting and neumann boundaries for coarse dofs */
3983   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
3984     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3985     const PetscInt         *idxs;
3986     ISLocalToGlobalMapping tmap;
3987 
3988     /* create map between primal indices (in local representative ordering) and local primal numbering */
3989     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3990     /* allocate space for temporary storage */
3991     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
3992     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
3993     /* allocate for IS array */
3994     nisdofs = pcbddc->n_ISForDofsLocal;
3995     nisneu = !!pcbddc->NeumannBoundariesLocal;
3996     nis = nisdofs + nisneu;
3997     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
3998     /* dofs splitting */
3999     for (i=0;i<nisdofs;i++) {
4000       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4001       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4002       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4003       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4004       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4005       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4006       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4007       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4008     }
4009     /* neumann boundaries */
4010     if (pcbddc->NeumannBoundariesLocal) {
4011       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4012       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4013       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4014       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4015       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4016       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4017       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4018       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4019     }
4020     /* free memory */
4021     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4022     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4023     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4024   } else {
4025     nis = 0;
4026     nisdofs = 0;
4027     nisneu = 0;
4028     isarray = NULL;
4029   }
4030   /* destroy no longer needed map */
4031   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4032 
4033   /* restrict on coarse candidates (if needed) */
4034   coarse_mat_is = NULL;
4035   if (csin) {
4036     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4037       if (redist) {
4038         PetscMPIInt rank;
4039         PetscInt    spc,n_spc_p1,dest[1],destsize;
4040 
4041         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4042         spc = active_procs/ncoarse;
4043         n_spc_p1 = active_procs%ncoarse;
4044         if (im_active) {
4045           destsize = 1;
4046           if (rank > n_spc_p1*(spc+1)-1) {
4047             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4048           } else {
4049             dest[0] = rank/(spc+1);
4050           }
4051         } else {
4052           destsize = 0;
4053         }
4054         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4055       } else if (csin_type_simple) {
4056         PetscMPIInt rank;
4057         PetscInt    issize,isidx;
4058 
4059         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4060         if (im_active) {
4061           issize = 1;
4062           isidx = (PetscInt)rank;
4063         } else {
4064           issize = 0;
4065           isidx = -1;
4066         }
4067         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4068       } else { /* get a suitable subassembling pattern from MATIS code */
4069         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4070       }
4071 
4072       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4073       if (!redist || ncoarse <= void_procs) {
4074         PetscInt ncoarse_cand,tissize,*nisindices;
4075         PetscInt *coarse_candidates;
4076         const PetscInt* tisindices;
4077 
4078         /* get coarse candidates' ranks in pc communicator */
4079         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4080         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4081         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4082           if (!coarse_candidates[i]) {
4083             coarse_candidates[ncoarse_cand++]=i;
4084           }
4085         }
4086         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4087 
4088 
4089         if (pcbddc->dbg_flag) {
4090           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4091           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4092           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4093           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4094           for (i=0;i<ncoarse_cand;i++) {
4095             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4096           }
4097           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4098           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4099         }
4100         /* shift the pattern on coarse candidates */
4101         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4102         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4103         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4104         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4105         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4106         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4107         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4108       }
4109       if (pcbddc->dbg_flag) {
4110         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4111         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4112         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4113         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4114       }
4115     }
4116     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4117     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4118   } else {
4119     if (pcbddc->dbg_flag) {
4120       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4121       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4122       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4123     }
4124     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4125     coarse_mat_is = t_coarse_mat_is;
4126   }
4127 
4128   /* create local to global scatters for coarse problem */
4129   if (compute_vecs) {
4130     PetscInt lrows;
4131     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4132     if (coarse_mat_is) {
4133       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4134     } else {
4135       lrows = 0;
4136     }
4137     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4138     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4139     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4140     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4141     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4142   }
4143   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4144   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4145 
4146   /* set defaults for coarse KSP and PC */
4147   if (multilevel_allowed) {
4148     coarse_ksp_type = KSPRICHARDSON;
4149     coarse_pc_type = PCBDDC;
4150   } else {
4151     coarse_ksp_type = KSPPREONLY;
4152     coarse_pc_type = PCREDUNDANT;
4153   }
4154 
4155   /* print some info if requested */
4156   if (pcbddc->dbg_flag) {
4157     if (!multilevel_allowed) {
4158       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4159       if (multilevel_requested) {
4160         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4161       } else if (pcbddc->max_levels) {
4162         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4163       }
4164       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4165     }
4166   }
4167 
4168   /* create the coarse KSP object only once with defaults */
4169   if (coarse_mat_is) {
4170     MatReuse coarse_mat_reuse;
4171     PetscViewer dbg_viewer = NULL;
4172     if (pcbddc->dbg_flag) {
4173       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4174       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4175     }
4176     if (!pcbddc->coarse_ksp) {
4177       char prefix[256],str_level[16];
4178       size_t len;
4179       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4180       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4181       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4182       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4183       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4184       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4185       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4186       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4187       /* prefix */
4188       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4189       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4190       if (!pcbddc->current_level) {
4191         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4192         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4193       } else {
4194         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4195         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4196         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4197         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4198         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4199         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4200       }
4201       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4202       /* allow user customization */
4203       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4204       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4205     }
4206 
4207     /* get some info after set from options */
4208     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4209     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4210     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4211     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4212     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4213       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4214       isbddc = PETSC_FALSE;
4215     }
4216     if (isredundant) {
4217       KSP inner_ksp;
4218       PC inner_pc;
4219       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4220       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4221       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4222     }
4223 
4224     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4225     ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4226     ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4227     ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4228     if (nisdofs) {
4229       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4230       for (i=0;i<nisdofs;i++) {
4231         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4232       }
4233     }
4234     if (nisneu) {
4235       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4236       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4237     }
4238 
4239     /* assemble coarse matrix */
4240     if (coarse_reuse) {
4241       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4242       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4243       coarse_mat_reuse = MAT_REUSE_MATRIX;
4244     } else {
4245       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4246     }
4247     if (isbddc || isnn) {
4248       if (pcbddc->coarsening_ratio > 1) {
4249         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4250           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4251           if (pcbddc->dbg_flag) {
4252             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4253             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4254             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4255             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4256           }
4257         }
4258         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4259       } else {
4260         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4261         coarse_mat = coarse_mat_is;
4262       }
4263     } else {
4264       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4265     }
4266     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4267 
4268     /* propagate symmetry info to coarse matrix */
4269     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
4270     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4271 
4272     /* set operators */
4273     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4274     if (pcbddc->dbg_flag) {
4275       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4276     }
4277   } else { /* processes non partecipating to coarse solver (if any) */
4278     coarse_mat = 0;
4279   }
4280   ierr = PetscFree(isarray);CHKERRQ(ierr);
4281 #if 0
4282   {
4283     PetscViewer viewer;
4284     char filename[256];
4285     sprintf(filename,"coarse_mat.m");
4286     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4287     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4288     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4289     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4290   }
4291 #endif
4292 
4293   /* Compute coarse null space (special handling by BDDC only) */
4294   if (pcbddc->NullSpace) {
4295     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4296   }
4297 
4298   if (pcbddc->coarse_ksp) {
4299     Vec crhs,csol;
4300     PetscBool ispreonly;
4301     if (CoarseNullSpace) {
4302       if (isbddc) {
4303         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4304       } else {
4305         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4306       }
4307     }
4308     /* setup coarse ksp */
4309     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4310     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4311     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4312     /* hack */
4313     if (!csol) {
4314       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4315     }
4316     if (!crhs) {
4317       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4318     }
4319     /* Check coarse problem if in debug mode or if solving with an iterative method */
4320     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4321     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4322       KSP       check_ksp;
4323       KSPType   check_ksp_type;
4324       PC        check_pc;
4325       Vec       check_vec,coarse_vec;
4326       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4327       PetscInt  its;
4328       PetscBool compute_eigs;
4329       PetscReal *eigs_r,*eigs_c;
4330       PetscInt  neigs;
4331       const char *prefix;
4332 
4333       /* Create ksp object suitable for estimation of extreme eigenvalues */
4334       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4335       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4336       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4337       if (ispreonly) {
4338         check_ksp_type = KSPPREONLY;
4339         compute_eigs = PETSC_FALSE;
4340       } else {
4341         check_ksp_type = KSPGMRES;
4342         compute_eigs = PETSC_TRUE;
4343       }
4344       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4345       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4346       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4347       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4348       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4349       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4350       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4351       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4352       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4353       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4354       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4355       /* create random vec */
4356       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4357       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4358       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4359       if (CoarseNullSpace) {
4360         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4361       }
4362       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4363       /* solve coarse problem */
4364       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4365       if (CoarseNullSpace) {
4366         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4367       }
4368       /* set eigenvalue estimation if preonly has not been requested */
4369       if (compute_eigs) {
4370         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4371         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4372         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4373         lambda_max = eigs_r[neigs-1];
4374         lambda_min = eigs_r[0];
4375         if (pcbddc->use_coarse_estimates) {
4376           if (lambda_max>lambda_min) {
4377             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4378             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4379           }
4380         }
4381       }
4382 
4383       /* check coarse problem residual error */
4384       if (pcbddc->dbg_flag) {
4385         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4386         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4387         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4388         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4389         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4390         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4391         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4392         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4393         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4394         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4395         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4396         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4397         if (compute_eigs) {
4398           PetscReal lambda_max_s,lambda_min_s;
4399           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4400           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4401           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4402           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4403           for (i=0;i<neigs;i++) {
4404             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4405           }
4406         }
4407         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4408         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4409       }
4410       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4411       if (compute_eigs) {
4412         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4413         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4414       }
4415     }
4416   }
4417   /* print additional info */
4418   if (pcbddc->dbg_flag) {
4419     /* waits until all processes reaches this point */
4420     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4421     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4422     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4423   }
4424 
4425   /* free memory */
4426   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4427   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4428   PetscFunctionReturn(0);
4429 }
4430 
4431 #undef __FUNCT__
4432 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4433 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4434 {
4435   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4436   PC_IS*         pcis = (PC_IS*)pc->data;
4437   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4438   PetscInt       i,coarse_size;
4439   PetscInt       *local_primal_indices;
4440   PetscErrorCode ierr;
4441 
4442   PetscFunctionBegin;
4443   /* Compute global number of coarse dofs */
4444   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
4445     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
4446   }
4447   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
4448 
4449   /* check numbering */
4450   if (pcbddc->dbg_flag) {
4451     PetscScalar coarsesum,*array;
4452     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4453 
4454     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4455     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4456     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4457     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4458     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4459     for (i=0;i<pcbddc->local_primal_size;i++) {
4460       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4461     }
4462     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4463     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4464     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4465     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4466     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4467     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4468     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4469     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4470     for (i=0;i<pcis->n;i++) {
4471       if (array[i] == 1.0) {
4472         set_error = PETSC_TRUE;
4473         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4474       }
4475     }
4476     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4477     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4478     for (i=0;i<pcis->n;i++) {
4479       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4480     }
4481     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4482     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4483     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4484     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4485     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4486     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4487     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4488       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4489       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4490       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4491       for (i=0;i<pcbddc->local_primal_size;i++) {
4492         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
4493       }
4494       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4495     }
4496     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4497     if (set_error_reduced) {
4498       SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4499     }
4500   }
4501   /* get back data */
4502   *coarse_size_n = coarse_size;
4503   *local_primal_indices_n = local_primal_indices;
4504   PetscFunctionReturn(0);
4505 }
4506 
4507 #undef __FUNCT__
4508 #define __FUNCT__ "PCBDDCGlobalToLocal"
4509 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4510 {
4511   IS             localis_t;
4512   PetscInt       i,lsize,*idxs,n;
4513   PetscScalar    *vals;
4514   PetscErrorCode ierr;
4515 
4516   PetscFunctionBegin;
4517   /* get indices in local ordering exploiting local to global map */
4518   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4519   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4520   for (i=0;i<lsize;i++) vals[i] = 1.0;
4521   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4522   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4523   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4524   if (idxs) { /* multilevel guard */
4525     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4526   }
4527   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4528   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4529   ierr = PetscFree(vals);CHKERRQ(ierr);
4530   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4531   /* now compute set in local ordering */
4532   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4533   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4534   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4535   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4536   for (i=0,lsize=0;i<n;i++) {
4537     if (PetscRealPart(vals[i]) > 0.5) {
4538       lsize++;
4539     }
4540   }
4541   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4542   for (i=0,lsize=0;i<n;i++) {
4543     if (PetscRealPart(vals[i]) > 0.5) {
4544       idxs[lsize++] = i;
4545     }
4546   }
4547   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4548   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4549   *localis = localis_t;
4550   PetscFunctionReturn(0);
4551 }
4552 
4553 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4554 #undef __FUNCT__
4555 #define __FUNCT__ "PCBDDCMatMult_Private"
4556 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4557 {
4558   PCBDDCChange_ctx change_ctx;
4559   PetscErrorCode   ierr;
4560 
4561   PetscFunctionBegin;
4562   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4563   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4564   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4565   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4566   PetscFunctionReturn(0);
4567 }
4568 
4569 #undef __FUNCT__
4570 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4571 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4572 {
4573   PCBDDCChange_ctx change_ctx;
4574   PetscErrorCode   ierr;
4575 
4576   PetscFunctionBegin;
4577   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4578   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4579   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4580   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4581   PetscFunctionReturn(0);
4582 }
4583 
4584 #undef __FUNCT__
4585 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4586 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4587 {
4588   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4589   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4590   PetscInt            *used_xadj,*used_adjncy;
4591   PetscBool           free_used_adj;
4592   PetscErrorCode      ierr;
4593 
4594   PetscFunctionBegin;
4595   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4596   free_used_adj = PETSC_FALSE;
4597   if (pcbddc->sub_schurs_layers == -1) {
4598     used_xadj = NULL;
4599     used_adjncy = NULL;
4600   } else {
4601     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4602       used_xadj = pcbddc->mat_graph->xadj;
4603       used_adjncy = pcbddc->mat_graph->adjncy;
4604     } else if (pcbddc->computed_rowadj) {
4605       used_xadj = pcbddc->mat_graph->xadj;
4606       used_adjncy = pcbddc->mat_graph->adjncy;
4607     } else {
4608       PetscBool      flg_row=PETSC_FALSE;
4609       const PetscInt *xadj,*adjncy;
4610       PetscInt       nvtxs;
4611 
4612       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4613       if (flg_row) {
4614         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4615         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4616         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4617         free_used_adj = PETSC_TRUE;
4618       } else {
4619         pcbddc->sub_schurs_layers = -1;
4620         used_xadj = NULL;
4621         used_adjncy = NULL;
4622       }
4623       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4624     }
4625   }
4626   ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->adaptive_selection,pcbddc->use_deluxe_scaling,pcbddc->adaptive_invert_Stildas,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4627 
4628   /* free adjacency */
4629   if (free_used_adj) {
4630     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4631   }
4632   PetscFunctionReturn(0);
4633 }
4634 
4635 #undef __FUNCT__
4636 #define __FUNCT__ "PCBDDCInitSubSchurs"
4637 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4638 {
4639   PC_IS               *pcis=(PC_IS*)pc->data;
4640   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4641   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4642   PCBDDCGraph         graph;
4643   Mat                 S_j;
4644   PetscErrorCode      ierr;
4645 
4646   PetscFunctionBegin;
4647   /* attach interface graph for determining subsets */
4648   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4649     IS verticesIS;
4650 
4651     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4652     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4653     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr);
4654     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr);
4655     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4656     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4657 /*
4658     if (pcbddc->dbg_flag) {
4659       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4660     }
4661 */
4662   } else {
4663     graph = pcbddc->mat_graph;
4664   }
4665 
4666   /* Create Schur complement matrix */
4667   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4668   ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4669 
4670   /* sub_schurs init */
4671   ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_threshold);CHKERRQ(ierr);
4672   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4673   /* free graph struct */
4674   if (pcbddc->sub_schurs_rebuild) {
4675     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4676   }
4677   PetscFunctionReturn(0);
4678 }
4679