xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 4c6709b31df94c6e6cd21f7a1b246810db8030e6)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscScalar     *Smult,*Seigv;
20   PetscReal       *eigs,thresh;
21   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscErrorCode  ierr;
26 
27   PetscFunctionBegin;
28   if (!sub_schurs->use_mumps) {
29     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
30   }
31   if (!sub_schurs->is_hermitian || !sub_schurs->is_posdef) {
32     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented");
33   }
34 
35   /* max size of subsets */
36   mss = 0;
37   for (i=0;i<sub_schurs->n_subs;i++) {
38     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
39       PetscInt subset_size;
40       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
41       mss = PetscMax(mss,subset_size);
42     }
43   }
44 
45   /* min/max and threshold */
46   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
47   nmin = pcbddc->adaptive_nmin > -1 ? pcbddc->adaptive_nmin : 1;
48   nmax = PetscMax(nmin,nmax);
49   if (pcbddc->adaptive_threshold > 1.0) {
50     thresh = 1.0/pcbddc->adaptive_threshold;
51   } else {
52     thresh = 1.0;
53   }
54 
55   /* allocate lapack workspace */
56   cum = cum2 = 0;
57   maxneigs = 0;
58   for (i=0;i<sub_schurs->n_subs;i++) {
59     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
60       PetscInt n,subset_size;
61       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
62       n = PetscMin(subset_size,nmax);
63       cum += subset_size*n;
64       cum2 += n;
65       maxneigs = PetscMax(maxneigs,n);
66     }
67   }
68 
69   if (mss) {
70     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
71       PetscBLASInt B_itype = 1;
72       PetscBLASInt B_N = mss;
73       PetscReal    zero = 0.0;
74       PetscReal    eps = 0.0; /* dlamch? */
75 
76       B_lwork = -1;
77       S = NULL;
78       St = NULL;
79       eigs = NULL;
80       eigv = NULL;
81       B_iwork = NULL;
82       B_ifail = NULL;
83       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
84 #if defined(PETSC_USE_COMPLEX)
85       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
86 #else
87       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
88 #endif
89       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
90       ierr = PetscFPTrapPop();CHKERRQ(ierr);
91     } else {
92         /* TODO */
93     }
94   } else {
95     lwork = 0;
96   }
97 
98   nv = 0;
99   if (sub_schurs->is_Ej_com) { /* complement of subsets, each entry is a vertex */
100     ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr);
101   }
102   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
103   ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs,
104                       B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
105 #if defined(PETSC_USE_COMPLEX)
106   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
107 #endif
108   ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr);
109   ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
110                       nv+cum2+1,&pcbddc->adaptive_constraints_ptrs,
111                       nv+cum,&pcbddc->adaptive_constraints_idxs,
112                       nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
113   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
114 
115   maxneigs = 0;
116   cum = cum2 = cumarray = 0;
117   if (sub_schurs->is_Ej_com) {
118     const PetscInt *idxs;
119 
120     ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
121     for (cum=0;cum<nv;cum++) {
122       pcbddc->adaptive_constraints_n[cum] = 1;
123       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
124       pcbddc->adaptive_constraints_ptrs[cum] = cum;
125       pcbddc->adaptive_constraints_data[cum] = 1.0;
126     }
127     cum2 = cum;
128     ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
129   }
130 
131   if (mss) { /* multilevel */
132     if (pcbddc->use_deluxe_scaling) {
133       ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
134     } else {
135       ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr);
136     }
137     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
138   }
139 
140   for (i=0;i<sub_schurs->n_subs;i++) {
141     PetscInt j,subset_size;
142 
143     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
144     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
145       const PetscInt *idxs;
146       PetscScalar    one = 1.0,scalar_zero = 0.0;
147       PetscReal      zero=0.0;
148       PetscBLASInt   B_N;
149 
150       /* S should be copied since we need it for deluxe scaling */
151       if (sub_schurs->is_hermitian) {
152         PetscInt j;
153         for (j=0;j<subset_size;j++) {
154           ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
155         }
156         for (j=0;j<subset_size;j++) {
157           ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
158         }
159       } else {
160         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
161         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
162       }
163       /* is always this the right matrix? */
164       ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
165 
166       /* we could reuse space already allocated when building sum_S_Ej_tilda_all */
167       /* St = Starray+cumarray; */
168 
169       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
170         PetscBLASInt B_itype = 1;
171         PetscBLASInt B_IL = 1, B_IU;
172         PetscReal    eps = -1.0; /* dlamch? */
173         PetscInt     nmin_s;
174 
175         /* ask for eigenvalues lower than thresh */
176         PetscPrintf(PETSC_COMM_SELF,"[%d] Computing for sub %d/%d.\n",PetscGlobalRank,i,sub_schurs->n_subs);
177         ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
178         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
179 #if defined(PETSC_USE_COMPLEX)
180         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
181 #else
182         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
183 #endif
184         ierr = PetscFPTrapPop();CHKERRQ(ierr);
185         if (B_ierr) {
186           if (B_ierr < 0 ) {
187             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
188           } else if (B_ierr <= B_N) {
189             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
190           } else {
191             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
192           }
193         }
194 
195         if (B_neigs > nmax) {
196           PetscPrintf(PETSC_COMM_SELF,"[%d]   found %d eigs, more than maximum required %d.\n",PetscGlobalRank,B_neigs,nmax);
197           B_neigs = nmax;
198         }
199 
200         nmin_s = PetscMin(nmin,B_N);
201         if (B_neigs < nmin_s) {
202           PetscBLASInt B_neigs2;
203 
204           B_IL = B_neigs + 1;
205           ierr = PetscBLASIntCast(nmin_s,&B_IU);CHKERRQ(ierr);
206           PetscPrintf(PETSC_COMM_SELF,"[%d]   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",PetscGlobalRank,B_neigs,nmin,B_IL,B_IU);
207           if (sub_schurs->is_hermitian) {
208             PetscInt j;
209             for (j=0;j<subset_size;j++) {
210               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
211             }
212             for (j=0;j<subset_size;j++) {
213               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
214             }
215           } else {
216             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
217             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
218           }
219           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
220 #if defined(PETSC_USE_COMPLEX)
221           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
222 #else
223           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
224 #endif
225           ierr = PetscFPTrapPop();CHKERRQ(ierr);
226           B_neigs += B_neigs2;
227         }
228         if (B_ierr) {
229           if (B_ierr < 0 ) {
230             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
231           } else if (B_ierr <= B_N) {
232             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
233           } else {
234             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
235           }
236         }
237         PetscPrintf(PETSC_COMM_SELF,"[%d]   -> Got %d eigs\n",PetscGlobalRank,B_neigs);
238         for (j=0;j<B_neigs;j++) {
239           if (eigs[j] == 0.0) {
240             PetscPrintf(PETSC_COMM_SELF,"[%d]     Inf\n",PetscGlobalRank);
241           } else {
242             PetscPrintf(PETSC_COMM_SELF,"[%d]     %1.6e\n",PetscGlobalRank,1.0/eigs[j]);
243           }
244         }
245       } else {
246           /* TODO */
247       }
248       maxneigs = PetscMax(B_neigs,maxneigs);
249       pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
250 
251       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
252       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&scalar_zero,Seigv,&B_N));
253       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
254 #if 0
255       PetscInt ii;
256       for (ii=0;ii<B_neigs;ii++) {
257         PetscPrintf(PETSC_COMM_SELF,"[%d]   -> Eigenvector %d/%d (%d)\n",PetscGlobalRank,ii,B_neigs,B_N);
258         for (j=0;j<B_N;j++) {
259           PetscPrintf(PETSC_COMM_SELF,"[%d]       %1.4e %1.4e\n",PetscGlobalRank,eigv[ii*B_N+j],Seigv[ii*B_N+j]);
260         }
261       }
262 #endif
263       ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
264       for (j=0;j<B_neigs;j++) {
265 #if 0
266         {
267           PetscBLASInt Blas_N,Blas_one = 1.0;
268           PetscScalar norm;
269           ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
270           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
271           if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
272             norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
273           } else {
274             norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
275           }
276           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
277         }
278 #endif
279         ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
280         pcbddc->adaptive_constraints_ptrs[cum++] = cum2;
281         cum2 += subset_size;
282       }
283       ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
284     }
285     /* shift for next computation */
286     cumarray += subset_size*subset_size;
287   }
288   pcbddc->adaptive_constraints_ptrs[cum] = cum2;
289   ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr);
290 
291   if (mss) {
292     if (pcbddc->use_deluxe_scaling) {
293       ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
294     } else {
295       ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr);
296     }
297     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
298   }
299   ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
300 #if defined(PETSC_USE_COMPLEX)
301   ierr = PetscFree(rwork);CHKERRQ(ierr);
302 #endif
303   if (pcbddc->dbg_flag) {
304     PetscInt maxneigs_r;
305     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
306     ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
307   }
308   PetscFunctionReturn(0);
309 }
310 
311 #undef __FUNCT__
312 #define __FUNCT__ "PCBDDCSetUpSolvers"
313 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
314 {
315   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
316   PetscScalar    *coarse_submat_vals;
317   PetscErrorCode ierr;
318 
319   PetscFunctionBegin;
320   /* Setup local scatters R_to_B and (optionally) R_to_D */
321   /* PCBDDCSetUpLocalWorkVectors should be called first! */
322   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
323 
324   /* Setup local neumann solver ksp_R */
325   /* PCBDDCSetUpLocalScatters should be called first! */
326   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
327 
328   /* Change global null space passed in by the user if change of basis has been requested */
329   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
330     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
331   }
332 
333   /*
334      Setup local correction and local part of coarse basis.
335      Gives back the dense local part of the coarse matrix in column major ordering
336   */
337   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
338 
339   /* Compute total number of coarse nodes and setup coarse solver */
340   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
341 
342   /* free */
343   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
344   PetscFunctionReturn(0);
345 }
346 
347 #undef __FUNCT__
348 #define __FUNCT__ "PCBDDCResetCustomization"
349 PetscErrorCode PCBDDCResetCustomization(PC pc)
350 {
351   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
352   PetscErrorCode ierr;
353 
354   PetscFunctionBegin;
355   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
356   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
357   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
358   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
359   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
360   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
361   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
362   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
363   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
364   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
365   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
366   PetscFunctionReturn(0);
367 }
368 
369 #undef __FUNCT__
370 #define __FUNCT__ "PCBDDCResetTopography"
371 PetscErrorCode PCBDDCResetTopography(PC pc)
372 {
373   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
374   PetscErrorCode ierr;
375 
376   PetscFunctionBegin;
377   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
378   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
379   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
380   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
381   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
382   PetscFunctionReturn(0);
383 }
384 
385 #undef __FUNCT__
386 #define __FUNCT__ "PCBDDCResetSolvers"
387 PetscErrorCode PCBDDCResetSolvers(PC pc)
388 {
389   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
390   PetscScalar    *array;
391   PetscErrorCode ierr;
392 
393   PetscFunctionBegin;
394   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
395   if (pcbddc->coarse_phi_B) {
396     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
397     ierr = PetscFree(array);CHKERRQ(ierr);
398   }
399   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
400   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
401   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
402   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
403   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
404   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
405   if (pcbddc->local_auxmat2) {
406     ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
407     ierr = PetscFree(array);CHKERRQ(ierr);
408   }
409   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
410   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
411   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
412   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
413   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
414   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
415   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
416   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
417   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
418   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
419   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
420   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
421   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
422   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
423   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
424   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
425   PetscFunctionReturn(0);
426 }
427 
428 #undef __FUNCT__
429 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
430 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
431 {
432   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
433   PC_IS          *pcis = (PC_IS*)pc->data;
434   VecType        impVecType;
435   PetscInt       n_constraints,n_R,old_size;
436   PetscErrorCode ierr;
437 
438   PetscFunctionBegin;
439   if (!pcbddc->ConstraintMatrix) {
440     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
441   }
442   /* get sizes */
443   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
444   n_R = pcis->n-pcbddc->n_actual_vertices;
445   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
446   /* local work vectors (try to avoid unneeded work)*/
447   /* R nodes */
448   old_size = -1;
449   if (pcbddc->vec1_R) {
450     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
451   }
452   if (n_R != old_size) {
453     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
454     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
455     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
456     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
457     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
458     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
459   }
460   /* local primal dofs */
461   old_size = -1;
462   if (pcbddc->vec1_P) {
463     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
464   }
465   if (pcbddc->local_primal_size != old_size) {
466     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
467     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
468     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
469     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
470   }
471   /* local explicit constraints */
472   old_size = -1;
473   if (pcbddc->vec1_C) {
474     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
475   }
476   if (n_constraints && n_constraints != old_size) {
477     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
478     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
479     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
480     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
481   }
482   PetscFunctionReturn(0);
483 }
484 
485 #undef __FUNCT__
486 #define __FUNCT__ "PCBDDCSetUpCorrection"
487 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
488 {
489   PetscErrorCode         ierr;
490   /* pointers to pcis and pcbddc */
491   PC_IS*                 pcis = (PC_IS*)pc->data;
492   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
493   /* submatrices of local problem */
494   Mat                    A_RV,A_VR,A_VV;
495   /* submatrices of local coarse problem */
496   Mat                    S_VV,S_CV,S_VC,S_CC;
497   /* working matrices */
498   Mat                    C_CR;
499   /* additional working stuff */
500   PC                     pc_R;
501   Mat                    F;
502   PetscBool              isLU,isCHOL,isILU;
503 
504   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
505   PetscScalar            *work;
506   PetscInt               *idx_V_B;
507   PetscInt               n,n_vertices,n_constraints;
508   PetscInt               i,n_R,n_D,n_B;
509   PetscBool              unsymmetric_check;
510   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
511   MatType                impMatType;
512   /* some shortcuts to scalars */
513   PetscScalar            one=1.0,m_one=-1.0;
514 
515   PetscFunctionBegin;
516   /* get number of vertices (corners plus constraints with change of basis)
517      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
518   n_vertices = pcbddc->n_actual_vertices;
519   n_constraints = pcbddc->local_primal_size-n_vertices;
520   /* Set Non-overlapping dimensions */
521   n_B = pcis->n_B; n_D = pcis->n - n_B;
522   n_R = pcis->n-n_vertices;
523 
524   /* Set types for local objects needed by BDDC precondtioner */
525   impMatType = MATSEQDENSE;
526 
527   /* vertices in boundary numbering */
528   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
529   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
530   if (i != n_vertices) {
531     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
532   }
533 
534   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
535   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
536   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
537   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
538   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
539   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
540   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
541   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
542   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
543   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
544 
545   unsymmetric_check = PETSC_FALSE;
546   /* allocate workspace */
547   n = 0;
548   if (n_constraints) {
549     n += n_R*n_constraints;
550   }
551   if (n_vertices) {
552     n = PetscMax(2*n_R*n_vertices,n);
553   }
554   if (!pcbddc->issym) {
555     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
556     unsymmetric_check = PETSC_TRUE;
557   }
558   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
559 
560   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
561   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
562   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
563   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
564   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
565   if (isLU || isILU || isCHOL) {
566     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
567   } else {
568     F = NULL;
569   }
570 
571   /* Precompute stuffs needed for preprocessing and application of BDDC*/
572   if (n_constraints) {
573     Mat M1,M2,M3;
574     IS  is_aux;
575     /* see if we can save some allocations */
576     if (pcbddc->local_auxmat2) {
577       PetscInt on_R,on_constraints;
578       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
579       if (on_R != n_R || on_constraints != n_constraints) {
580         PetscScalar *marray;
581 
582         ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr);
583         ierr = PetscFree(marray);CHKERRQ(ierr);
584         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
585         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
586       }
587     }
588     /* auxiliary matrices */
589     if (!pcbddc->local_auxmat2) {
590       PetscScalar *marray;
591 
592       ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr);
593       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr);
594       marray += n_R*n_constraints;
595       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr);
596     }
597 
598     /* Extract constraints on R nodes: C_{CR}  */
599     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
600     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
601     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
602 
603     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
604     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
605     for (i=0;i<n_constraints;i++) {
606       const PetscScalar *row_cmat_values;
607       const PetscInt    *row_cmat_indices;
608       PetscInt          size_of_constraint,j;
609 
610       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
611       for (j=0;j<size_of_constraint;j++) {
612         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
613       }
614       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
615     }
616     if (F) {
617       Mat B;
618 
619       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
620       ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr);
621       ierr = MatDestroy(&B);CHKERRQ(ierr);
622     } else {
623       PetscScalar *xarray;
624       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
625       for (i=0;i<n_constraints;i++) {
626         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
627         ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr);
628         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
629         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
630         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
631       }
632       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
633     }
634 
635     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
636     ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
637     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
638     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
639     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
640     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
641     ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr);
642     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
643     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
644     ierr = MatDestroy(&M2);CHKERRQ(ierr);
645     ierr = MatDestroy(&M3);CHKERRQ(ierr);
646     /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */
647     ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
648     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
649     ierr = MatDestroy(&M1);CHKERRQ(ierr);
650   }
651   /* Get submatrices from subdomain matrix */
652   if (n_vertices) {
653     Mat       newmat;
654     IS        is_aux;
655     PetscInt  ibs,mbs;
656     PetscBool issbaij;
657 
658     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
659     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
660     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
661     if (ibs != mbs) { /* need to convert to SEQAIJ */
662       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
663       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
664       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
665       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
666       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
667     } else {
668       /* this is safe */
669       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
670       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
671       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
672         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
673         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
674         ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
675         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
676         ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
677       } else {
678         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
679         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
680       }
681     }
682     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
683   }
684 
685   /* Matrix of coarse basis functions (local) */
686   if (pcbddc->coarse_phi_B) {
687     PetscInt on_B,on_primal,on_D=n_D;
688     if (pcbddc->coarse_phi_D) {
689       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
690     }
691     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
692     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
693       PetscScalar *marray;
694 
695       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
696       ierr = PetscFree(marray);CHKERRQ(ierr);
697       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
698       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
699       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
700       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
701     }
702   }
703 
704   if (!pcbddc->coarse_phi_B) {
705     PetscScalar *marray;
706 
707     n = n_B*pcbddc->local_primal_size;
708     if (pcbddc->switch_static || pcbddc->dbg_flag) {
709       n += n_D*pcbddc->local_primal_size;
710     }
711     if (!pcbddc->issym) {
712       n *= 2;
713     }
714     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
715     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
716     n = n_B*pcbddc->local_primal_size;
717     if (pcbddc->switch_static || pcbddc->dbg_flag) {
718       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
719       n += n_D*pcbddc->local_primal_size;
720     }
721     if (!pcbddc->issym) {
722       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
723       if (pcbddc->switch_static || pcbddc->dbg_flag) {
724         n = n_B*pcbddc->local_primal_size;
725         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
726       }
727     } else {
728       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
729       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
730       if (pcbddc->switch_static || pcbddc->dbg_flag) {
731         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
732         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
733       }
734     }
735   }
736   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
737   /* vertices */
738   if (n_vertices) {
739 
740     if (n_R) {
741       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
742       PetscBLASInt B_N,B_one = 1;
743       PetscScalar  *x,*y;
744 
745       ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
746       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
747       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
748       if (F) {
749         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
750       } else {
751         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
752         for (i=0;i<n_vertices;i++) {
753           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
754           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
755           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
756           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
757           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
758         }
759         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
760       }
761       ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr);
762       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
763       if (n_constraints) {
764         Mat B;
765         ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
766         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
767         ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
768         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
769         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
770         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
771         ierr = MatDestroy(&B);CHKERRQ(ierr);
772       }
773       ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
774       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
775       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
776       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
777       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
778       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
779       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
780       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
781       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
782       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
783       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
784       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
785     } else {
786       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
787       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
788     }
789     /* coarse basis functions */
790     for (i=0;i<n_vertices;i++) {
791       PetscScalar *y;
792 
793       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
794       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
795       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
796       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
797       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
798       y[n_B*i+idx_V_B[i]] = 1.0;
799       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
800       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
801 
802       if (pcbddc->switch_static || pcbddc->dbg_flag) {
803         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
804         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
805         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
806         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
807         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
808         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
809       }
810       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
811     }
812     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
813     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
814   }
815 
816   if (n_constraints) {
817     Mat B;
818 
819     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
820     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
821     ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
822     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
823     if (n_vertices) {
824       ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr);
825     }
826     ierr = MatDestroy(&B);CHKERRQ(ierr);
827     /* coarse basis functions */
828     for (i=0;i<n_constraints;i++) {
829       PetscScalar *y;
830 
831       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
832       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
833       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
834       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
835       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
836       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
837       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
838 
839       if (pcbddc->switch_static || pcbddc->dbg_flag) {
840         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
841         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
842         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
843         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
844         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
845         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
846       }
847       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
848     }
849   }
850 
851   /* compute other basis functions for non-symmetric problems */
852   if (!pcbddc->issym) {
853     Mat B,X;
854 
855     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr);
856 
857     if (n_constraints) {
858       Mat S_CCT,B_C;
859 
860       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
861       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
862       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
863       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
864       if (n_vertices) {
865         Mat B_V,S_VCT;
866 
867         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
868         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
869         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
870         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
871         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
872       }
873       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
874     }
875     if (n_vertices && n_R) {
876       Mat          A_VRT;
877       PetscBLASInt B_N,B_one = 1;
878 
879       if (!n_constraints) { /* if there are no constraints, reset work */
880         ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
881       }
882       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr);
883       ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
884       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
885       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one));
886       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
887     }
888 
889     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr);
890     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
891       for (i=0;i<pcbddc->local_primal_size;i++) {
892         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
893         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
894         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
895         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
896         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
897       }
898     } else {
899       for (i=0;i<pcbddc->local_primal_size;i++) {
900         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
901         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
902         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
903         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
904         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
905       }
906     }
907     ierr = MatDestroy(&B);CHKERRQ(ierr);
908     /* coarse basis functions */
909     for (i=0;i<pcbddc->local_primal_size;i++) {
910       PetscScalar *y;
911 
912       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
913       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
914       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
915       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
916       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
917       if (i<n_vertices) {
918         y[n_B*i+idx_V_B[i]] = 1.0;
919       }
920       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
921       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
922 
923       if (pcbddc->switch_static || pcbddc->dbg_flag) {
924         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
925         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
926         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
927         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
928         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
929         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
930       }
931       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
932     }
933     ierr = MatDestroy(&X);CHKERRQ(ierr);
934   }
935 
936   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
937   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
938   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
939   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
940   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
941   /* Checking coarse_sub_mat and coarse basis functios */
942   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
943   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
944   if (pcbddc->dbg_flag) {
945     Mat         coarse_sub_mat;
946     Mat         AUXMAT,TM1,TM2,TM3,TM4;
947     Mat         coarse_phi_D,coarse_phi_B;
948     Mat         coarse_psi_D,coarse_psi_B;
949     Mat         A_II,A_BB,A_IB,A_BI;
950     MatType     checkmattype=MATSEQAIJ;
951     PetscReal   real_value;
952 
953     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
954     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
955     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
956     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
957     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
958     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
959     if (unsymmetric_check) {
960       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
961       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
962     }
963     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
964 
965     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
966     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr);
967     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
968     if (unsymmetric_check) {
969       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
970       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
971       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
972       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
973       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
974       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
975       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
976       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
977       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
978       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
979       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
980       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
981     } else {
982       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
983       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
984       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
985       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
986       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
987       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
988       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
989       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
990     }
991     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
992     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
993     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
994     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
995     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
996     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
997 
998     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
999     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1000     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1001     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1002     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1003     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1004     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1005     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1006     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1007     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1008     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1009     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1010     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1011     if (unsymmetric_check) {
1012       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1013       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1014     }
1015     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1016   }
1017 
1018   /* free memory */
1019   ierr = PetscFree(work);CHKERRQ(ierr);
1020   if (n_vertices) {
1021     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1022   }
1023   if (n_constraints) {
1024     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1025   }
1026   /* get back data */
1027   *coarse_submat_vals_n = coarse_submat_vals;
1028   PetscFunctionReturn(0);
1029 }
1030 
1031 #undef __FUNCT__
1032 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1033 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1034 {
1035   Mat            *work_mat;
1036   IS             isrow_s,iscol_s;
1037   PetscBool      rsorted,csorted;
1038   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1039   PetscErrorCode ierr;
1040 
1041   PetscFunctionBegin;
1042   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1043   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1044   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1045   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1046 
1047   if (!rsorted) {
1048     const PetscInt *idxs;
1049     PetscInt *idxs_sorted,i;
1050 
1051     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1052     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1053     for (i=0;i<rsize;i++) {
1054       idxs_perm_r[i] = i;
1055     }
1056     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1057     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1058     for (i=0;i<rsize;i++) {
1059       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1060     }
1061     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1062     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1063   } else {
1064     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1065     isrow_s = isrow;
1066   }
1067 
1068   if (!csorted) {
1069     if (isrow == iscol) {
1070       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1071       iscol_s = isrow_s;
1072     } else {
1073       const PetscInt *idxs;
1074       PetscInt *idxs_sorted,i;
1075 
1076       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1077       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1078       for (i=0;i<csize;i++) {
1079         idxs_perm_c[i] = i;
1080       }
1081       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1082       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1083       for (i=0;i<csize;i++) {
1084         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1085       }
1086       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1087       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1088     }
1089   } else {
1090     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1091     iscol_s = iscol;
1092   }
1093 
1094   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1095 
1096   if (!rsorted || !csorted) {
1097     Mat      new_mat;
1098     IS       is_perm_r,is_perm_c;
1099 
1100     if (!rsorted) {
1101       PetscInt *idxs_r,i;
1102       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1103       for (i=0;i<rsize;i++) {
1104         idxs_r[idxs_perm_r[i]] = i;
1105       }
1106       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1107       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1108     } else {
1109       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1110     }
1111     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1112 
1113     if (!csorted) {
1114       if (isrow_s == iscol_s) {
1115         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1116         is_perm_c = is_perm_r;
1117       } else {
1118         PetscInt *idxs_c,i;
1119         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1120         for (i=0;i<csize;i++) {
1121           idxs_c[idxs_perm_c[i]] = i;
1122         }
1123         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1124         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1125       }
1126     } else {
1127       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1128     }
1129     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1130 
1131     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1132     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1133     work_mat[0] = new_mat;
1134     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1135     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1136   }
1137 
1138   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1139   *B = work_mat[0];
1140   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1141   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1142   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 #undef __FUNCT__
1147 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1148 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1149 {
1150   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1151   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1152   Mat            new_mat;
1153   IS             is_local,is_global;
1154   PetscInt       local_size;
1155   PetscBool      isseqaij;
1156   PetscErrorCode ierr;
1157 
1158   PetscFunctionBegin;
1159   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1160   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1161   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1162   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1163   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1164   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1165   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1166 
1167   /* check */
1168   if (pcbddc->dbg_flag) {
1169     Vec       x,x_change;
1170     PetscReal error;
1171 
1172     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1173     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1174     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1175     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1176     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1177     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1178     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1179     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1180     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1181     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1182     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1183     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1184     ierr = VecDestroy(&x);CHKERRQ(ierr);
1185     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1186   }
1187 
1188   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1189   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1190   if (isseqaij) {
1191     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1192   } else {
1193     Mat work_mat;
1194     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1195     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1196     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1197   }
1198   ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
1199   /*
1200   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1201   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1202   */
1203   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 #undef __FUNCT__
1208 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1209 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1210 {
1211   PC_IS*         pcis = (PC_IS*)(pc->data);
1212   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1213   IS             is_aux1,is_aux2;
1214   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1215   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
1216   PetscInt       vbs,bs;
1217   PetscBT        bitmask;
1218   PetscErrorCode ierr;
1219 
1220   PetscFunctionBegin;
1221   /*
1222     No need to setup local scatters if
1223       - primal space is unchanged
1224         AND
1225       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1226         AND
1227       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1228   */
1229   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1230     PetscFunctionReturn(0);
1231   }
1232   /* destroy old objects */
1233   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1234   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1235   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1236   /* Set Non-overlapping dimensions */
1237   n_B = pcis->n_B; n_D = pcis->n - n_B;
1238   n_vertices = pcbddc->n_actual_vertices;
1239   /* create auxiliary bitmask */
1240   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1241   for (i=0;i<n_vertices;i++) {
1242     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
1243   }
1244 
1245   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1246   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1247   for (i=0, n_R=0; i<pcis->n; i++) {
1248     if (!PetscBTLookup(bitmask,i)) {
1249       idx_R_local[n_R] = i;
1250       n_R++;
1251     }
1252   }
1253 
1254   /* Block code */
1255   vbs = 1;
1256   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1257   if (bs>1 && !(n_vertices%bs)) {
1258     PetscBool is_blocked = PETSC_TRUE;
1259     PetscInt  *vary;
1260     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1261     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1262     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1263     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
1264     for (i=0; i<n_vertices; i++) {
1265       if (vary[i]!=0 && vary[i]!=bs) {
1266         is_blocked = PETSC_FALSE;
1267         break;
1268       }
1269     }
1270     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1271       vbs = bs;
1272       for (i=0;i<n_R/vbs;i++) {
1273         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1274       }
1275     }
1276     ierr = PetscFree(vary);CHKERRQ(ierr);
1277   }
1278   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1279   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1280 
1281   /* print some info if requested */
1282   if (pcbddc->dbg_flag) {
1283     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1284     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1285     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1286     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1287     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1288     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1289     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
1290     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1291   }
1292 
1293   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1294   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1295   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1296   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1297   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1298   for (i=0; i<n_D; i++) {
1299     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1300   }
1301   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1302   for (i=0, j=0; i<n_R; i++) {
1303     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1304       aux_array1[j++] = i;
1305     }
1306   }
1307   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1308   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1309   for (i=0, j=0; i<n_B; i++) {
1310     if (!PetscBTLookup(bitmask,is_indices[i])) {
1311       aux_array2[j++] = i;
1312     }
1313   }
1314   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1315   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1316   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1317   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1318   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1319 
1320   if (pcbddc->switch_static || pcbddc->dbg_flag) {
1321     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1322     for (i=0, j=0; i<n_R; i++) {
1323       if (PetscBTLookup(bitmask,idx_R_local[i])) {
1324         aux_array1[j++] = i;
1325       }
1326     }
1327     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1328     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1329     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1330   }
1331   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1332   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1333   PetscFunctionReturn(0);
1334 }
1335 
1336 
1337 #undef __FUNCT__
1338 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1339 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1340 {
1341   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1342   PC_IS          *pcis = (PC_IS*)pc->data;
1343   PC             pc_temp;
1344   Mat            A_RR;
1345   MatReuse       reuse;
1346   PetscScalar    m_one = -1.0;
1347   PetscReal      value;
1348   PetscInt       n_D,n_R,ibs,mbs;
1349   PetscBool      use_exact,use_exact_reduced,issbaij;
1350   PetscErrorCode ierr;
1351   /* prefixes stuff */
1352   char           dir_prefix[256],neu_prefix[256],str_level[16];
1353   size_t         len;
1354 
1355   PetscFunctionBegin;
1356 
1357   /* compute prefixes */
1358   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1359   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1360   if (!pcbddc->current_level) {
1361     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1362     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1363     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1364     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1365   } else {
1366     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1367     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1368     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1369     len -= 15; /* remove "pc_bddc_coarse_" */
1370     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1371     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1372     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1373     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1374     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1375     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1376     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1377     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1378   }
1379 
1380   /* DIRICHLET PROBLEM */
1381   if (dirichlet) {
1382     /* Matrix for Dirichlet problem is pcis->A_II */
1383     ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1384     if (!pcbddc->ksp_D) { /* create object if not yet build */
1385       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1386       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1387       /* default */
1388       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1389       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1390       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1391       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1392       if (issbaij) {
1393         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1394       } else {
1395         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1396       }
1397       /* Allow user's customization */
1398       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1399       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1400     }
1401     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1402     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1403     if (!n_D) {
1404       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1405       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1406     }
1407     /* Set Up KSP for Dirichlet problem of BDDC */
1408     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1409     /* set ksp_D into pcis data */
1410     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1411     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1412     pcis->ksp_D = pcbddc->ksp_D;
1413   }
1414 
1415   /* NEUMANN PROBLEM */
1416   A_RR = 0;
1417   if (neumann) {
1418     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1419     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1420     if (pcbddc->ksp_R) { /* already created ksp */
1421       PetscInt nn_R;
1422       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1423       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1424       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1425       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1426         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1427         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1428         reuse = MAT_INITIAL_MATRIX;
1429       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1430         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1431           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1432           reuse = MAT_INITIAL_MATRIX;
1433         } else { /* safe to reuse the matrix */
1434           reuse = MAT_REUSE_MATRIX;
1435         }
1436       }
1437       /* last check */
1438       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1439         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1440         reuse = MAT_INITIAL_MATRIX;
1441       }
1442     } else { /* first time, so we need to create the matrix */
1443       reuse = MAT_INITIAL_MATRIX;
1444     }
1445     /* extract A_RR */
1446     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1447     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1448     if (ibs != mbs) {
1449       Mat newmat;
1450       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1451       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1452       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1453     } else {
1454       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1455     }
1456     if (!pcbddc->ksp_R) { /* create object if not present */
1457       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1458       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1459       /* default */
1460       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1461       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1462       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1463       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1464       if (issbaij) {
1465         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1466       } else {
1467         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1468       }
1469       /* Allow user's customization */
1470       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1471       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1472     }
1473     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1474     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1475     if (!n_R) {
1476       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1477       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1478     }
1479     /* Set Up KSP for Neumann problem of BDDC */
1480     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1481   }
1482 
1483   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1484   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1485     if (pcbddc->dbg_flag) {
1486       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1487       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1488       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1489     }
1490     if (dirichlet) { /* Dirichlet */
1491       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1492       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1493       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1494       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1495       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1496       /* need to be adapted? */
1497       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1498       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1499       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1500       /* print info */
1501       if (pcbddc->dbg_flag) {
1502         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1503         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1504       }
1505       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1506         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1507       }
1508     }
1509     if (neumann) { /* Neumann */
1510       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1511       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1512       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1513       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1514       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1515       /* need to be adapted? */
1516       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1517       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1518       /* print info */
1519       if (pcbddc->dbg_flag) {
1520         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1521         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1522       }
1523       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1524         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1525       }
1526     }
1527   }
1528   /* free Neumann problem's matrix */
1529   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1530   PetscFunctionReturn(0);
1531 }
1532 
1533 #undef __FUNCT__
1534 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1535 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1536 {
1537   PetscErrorCode ierr;
1538   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1539 
1540   PetscFunctionBegin;
1541   if (applytranspose) {
1542     if (pcbddc->local_auxmat1) {
1543       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1544       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1545     }
1546     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1547   } else {
1548     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1549     if (pcbddc->local_auxmat1) {
1550       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1551       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1552     }
1553   }
1554   PetscFunctionReturn(0);
1555 }
1556 
1557 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1558 #undef __FUNCT__
1559 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1560 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1561 {
1562   PetscErrorCode ierr;
1563   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1564   PC_IS*            pcis = (PC_IS*)  (pc->data);
1565   const PetscScalar zero = 0.0;
1566 
1567   PetscFunctionBegin;
1568   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1569   if (applytranspose) {
1570     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1571     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1572   } else {
1573     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1574     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1575   }
1576   /* start communications from local primal nodes to rhs of coarse solver */
1577   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1578   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1579   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1580 
1581   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1582   /* TODO remove null space when doing multilevel */
1583   if (pcbddc->coarse_ksp) {
1584     if (applytranspose) {
1585       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1586     } else {
1587       ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1588     }
1589   }
1590 
1591   /* Local solution on R nodes */
1592   if (pcis->n) {
1593     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1594     ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1595     ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1596     if (pcbddc->switch_static) {
1597       ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1598       ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1599     }
1600     ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1601     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1602     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1603     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1604     if (pcbddc->switch_static) {
1605       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1606       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1607     }
1608   }
1609 
1610   /* communications from coarse sol to local primal nodes */
1611   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1612   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1613 
1614   /* Sum contributions from two levels */
1615   if (applytranspose) {
1616     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1617     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1618   } else {
1619     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1620     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1621   }
1622   PetscFunctionReturn(0);
1623 }
1624 
1625 #undef __FUNCT__
1626 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1627 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1628 {
1629   PetscErrorCode ierr;
1630   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1631   PetscScalar    *array;
1632   Vec            from,to;
1633 
1634   PetscFunctionBegin;
1635   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1636     from = pcbddc->coarse_vec;
1637     to = pcbddc->vec1_P;
1638     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1639       Vec tvec;
1640 
1641       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1642       ierr = VecResetArray(tvec);CHKERRQ(ierr);
1643       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1644       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
1645       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
1646       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
1647     }
1648   } else { /* from local to global -> put data in coarse right hand side */
1649     from = pcbddc->vec1_P;
1650     to = pcbddc->coarse_vec;
1651   }
1652   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1653   PetscFunctionReturn(0);
1654 }
1655 
1656 #undef __FUNCT__
1657 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1658 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1659 {
1660   PetscErrorCode ierr;
1661   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1662   PetscScalar    *array;
1663   Vec            from,to;
1664 
1665   PetscFunctionBegin;
1666   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1667     from = pcbddc->coarse_vec;
1668     to = pcbddc->vec1_P;
1669   } else { /* from local to global -> put data in coarse right hand side */
1670     from = pcbddc->vec1_P;
1671     to = pcbddc->coarse_vec;
1672   }
1673   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1674   if (smode == SCATTER_FORWARD) {
1675     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1676       Vec tvec;
1677 
1678       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1679       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
1680       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
1681       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
1682     }
1683   } else {
1684     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
1685      ierr = VecResetArray(from);CHKERRQ(ierr);
1686     }
1687   }
1688   PetscFunctionReturn(0);
1689 }
1690 
1691 /* uncomment for testing purposes */
1692 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1693 #undef __FUNCT__
1694 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1695 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1696 {
1697   PetscErrorCode    ierr;
1698   PC_IS*            pcis = (PC_IS*)(pc->data);
1699   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1700   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1701   /* one and zero */
1702   PetscScalar       one=1.0,zero=0.0;
1703   /* space to store constraints and their local indices */
1704   PetscScalar       *temp_quadrature_constraint;
1705   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1706   /* iterators */
1707   PetscInt          i,j,k,total_counts,temp_start_ptr;
1708   /* BLAS integers */
1709   PetscBLASInt      lwork,lierr;
1710   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1711   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1712   /* reuse */
1713   PetscInt          olocal_primal_size;
1714   PetscInt          *oprimal_indices_local_idxs;
1715   /* change of basis */
1716   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1717   PetscBool         boolforchange,qr_needed;
1718   PetscBT           touched,change_basis,qr_needed_idx;
1719   /* auxiliary stuff */
1720   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1721   PetscInt          ncc,*gidxs=NULL,*permutation=NULL,*temp_indices_to_constraint_work=NULL;
1722   PetscScalar       *temp_quadrature_constraint_work=NULL;
1723   /* some quantities */
1724   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1725   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1726 
1727   PetscFunctionBegin;
1728   /* Destroy Mat objects computed previously */
1729   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1730   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1731 
1732   /* print some info */
1733   if (pcbddc->dbg_flag) {
1734     IS       vertices;
1735     PetscInt nv,nedges,nfaces;
1736     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
1737     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
1738     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
1739     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1740     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1741     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
1742     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_edges);CHKERRQ(ierr);
1743     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_faces);CHKERRQ(ierr);
1744     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1745   }
1746 
1747   if (!pcbddc->adaptive_selection) {
1748     IS           ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1749     MatNullSpace nearnullsp;
1750     const Vec    *nearnullvecs;
1751     Vec          *localnearnullsp;
1752     PetscScalar  *array;
1753     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
1754     PetscBool    nnsp_has_cnst;
1755     /* LAPACK working arrays for SVD or POD */
1756     PetscBool    skip_lapack;
1757     PetscScalar  *work;
1758     PetscReal    *singular_vals;
1759 #if defined(PETSC_USE_COMPLEX)
1760     PetscReal    *rwork;
1761 #endif
1762 #if defined(PETSC_MISSING_LAPACK_GESVD)
1763     PetscScalar  *temp_basis,*correlation_mat;
1764 #else
1765     PetscBLASInt dummy_int;
1766     PetscScalar  dummy_scalar;
1767 #endif
1768 
1769     /* Get index sets for faces, edges and vertices from graph */
1770     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1771     /* free unneeded index sets */
1772     if (!pcbddc->use_vertices) {
1773       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1774     }
1775     if (!pcbddc->use_edges) {
1776       for (i=0;i<n_ISForEdges;i++) {
1777         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1778       }
1779       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1780       n_ISForEdges = 0;
1781     }
1782     if (!pcbddc->use_faces) {
1783       for (i=0;i<n_ISForFaces;i++) {
1784         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1785       }
1786       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1787       n_ISForFaces = 0;
1788     }
1789     /* HACKS (the following two blocks of code) */
1790     if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1791       pcbddc->use_change_of_basis = PETSC_TRUE;
1792       if (!ISForEdges) {
1793         pcbddc->use_change_on_faces = PETSC_TRUE;
1794       }
1795     }
1796     if (pcbddc->NullSpace) {
1797       /* use_change_of_basis should be consistent among processors */
1798       PetscBool tbool[2],gbool[2];
1799       tbool [0] = pcbddc->use_change_of_basis;
1800       tbool [1] = pcbddc->use_change_on_faces;
1801       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1802       pcbddc->use_change_of_basis = gbool[0];
1803       pcbddc->use_change_on_faces = gbool[1];
1804     }
1805 
1806     /* check if near null space is attached to global mat */
1807     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1808     if (nearnullsp) {
1809       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1810       /* remove any stored info */
1811       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1812       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1813       /* store information for BDDC solver reuse */
1814       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1815       pcbddc->onearnullspace = nearnullsp;
1816       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1817       for (i=0;i<nnsp_size;i++) {
1818         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1819       }
1820     } else { /* if near null space is not provided BDDC uses constants by default */
1821       nnsp_size = 0;
1822       nnsp_has_cnst = PETSC_TRUE;
1823     }
1824     /* get max number of constraints on a single cc */
1825     max_constraints = nnsp_size;
1826     if (nnsp_has_cnst) max_constraints++;
1827 
1828     /*
1829          Evaluate maximum storage size needed by the procedure
1830          - temp_indices will contain start index of each constraint stored as follows
1831          - temp_indices_to_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1832          - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1833          - temp_quadrature_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself
1834                                                                                                                                                            */
1835     total_counts = n_ISForFaces+n_ISForEdges;
1836     total_counts *= max_constraints;
1837     n_vertices = 0;
1838     if (ISForVertices) {
1839       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1840     }
1841     total_counts += n_vertices;
1842     ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr);
1843     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1844     total_counts = 0;
1845     max_size_of_constraint = 0;
1846     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1847       if (i<n_ISForEdges) {
1848         used_IS = &ISForEdges[i];
1849       } else {
1850         used_IS = &ISForFaces[i-n_ISForEdges];
1851       }
1852       ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1853       total_counts += j;
1854       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1855     }
1856     total_counts *= max_constraints;
1857     total_counts += n_vertices;
1858     ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1859     /* get local part of global near null space vectors */
1860     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1861     for (k=0;k<nnsp_size;k++) {
1862       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1863       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1864       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1865     }
1866 
1867     /* whether or not to skip lapack calls */
1868     skip_lapack = PETSC_TRUE;
1869     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
1870 
1871     /* allocate some auxiliary stuff */
1872     if (!skip_lapack || pcbddc->use_qr_single) {
1873       ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr);
1874     }
1875 
1876     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1877     if (!skip_lapack) {
1878       PetscScalar temp_work;
1879 
1880 #if defined(PETSC_MISSING_LAPACK_GESVD)
1881       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1882       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1883       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1884       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1885 #if defined(PETSC_USE_COMPLEX)
1886       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1887 #endif
1888       /* now we evaluate the optimal workspace using query with lwork=-1 */
1889       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1890       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1891       lwork = -1;
1892       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1893 #if !defined(PETSC_USE_COMPLEX)
1894       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1895 #else
1896       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1897 #endif
1898       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1899       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1900 #else /* on missing GESVD */
1901       /* SVD */
1902       PetscInt max_n,min_n;
1903       max_n = max_size_of_constraint;
1904       min_n = max_constraints;
1905       if (max_size_of_constraint < max_constraints) {
1906         min_n = max_size_of_constraint;
1907         max_n = max_constraints;
1908       }
1909       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1910 #if defined(PETSC_USE_COMPLEX)
1911       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1912 #endif
1913       /* now we evaluate the optimal workspace using query with lwork=-1 */
1914       lwork = -1;
1915       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1916       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1917       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1918       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1919 #if !defined(PETSC_USE_COMPLEX)
1920       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
1921 #else
1922       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
1923 #endif
1924       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1925       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1926 #endif /* on missing GESVD */
1927       /* Allocate optimal workspace */
1928       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1929       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
1930     }
1931     /* Now we can loop on constraining sets */
1932     total_counts = 0;
1933     temp_indices[0] = 0;
1934     /* vertices */
1935     if (ISForVertices) {
1936       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1937       if (nnsp_has_cnst) { /* consider all vertices */
1938         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1939         for (i=0;i<n_vertices;i++) {
1940           temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1941           temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1942           total_counts++;
1943         }
1944       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1945         PetscBool used_vertex;
1946         for (i=0;i<n_vertices;i++) {
1947           used_vertex = PETSC_FALSE;
1948           k = 0;
1949           while (!used_vertex && k<nnsp_size) {
1950             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1951             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1952               temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1953               temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1954               temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1955               total_counts++;
1956               used_vertex = PETSC_TRUE;
1957             }
1958             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1959             k++;
1960           }
1961         }
1962       }
1963       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1964       n_vertices = total_counts;
1965     }
1966 
1967     /* edges and faces */
1968     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
1969       if (ncc<n_ISForEdges) {
1970         used_IS = &ISForEdges[ncc];
1971         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1972       } else {
1973         used_IS = &ISForFaces[ncc-n_ISForEdges];
1974         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1975       }
1976       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1977       temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1978       ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1979       ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1980       /* change of basis should not be performed on local periodic nodes */
1981       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1982       if (nnsp_has_cnst) {
1983         PetscScalar quad_value;
1984         temp_constraints++;
1985         if (!pcbddc->use_nnsp_true) {
1986           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1987         } else {
1988           quad_value = 1.0;
1989         }
1990         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1991         for (j=0;j<size_of_constraint;j++) {
1992           temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1993         }
1994         /* sort by global ordering if using lapack subroutines (not needed!) */
1995         if (!skip_lapack || pcbddc->use_qr_single) {
1996           ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
1997           for (j=0;j<size_of_constraint;j++) {
1998             permutation[j]=j;
1999           }
2000           ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2001           for (j=0;j<size_of_constraint;j++) {
2002             if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen");
2003           }
2004           for (j=0;j<size_of_constraint;j++) {
2005             temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2006             temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2007           }
2008           ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2009           ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2010         }
2011         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2012         total_counts++;
2013       }
2014       for (k=0;k<nnsp_size;k++) {
2015         PetscReal real_value;
2016         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2017         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2018         for (j=0;j<size_of_constraint;j++) {
2019           temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
2020         }
2021         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2022         /* check if array is null on the connected component */
2023         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2024         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
2025         if (real_value > 0.0) { /* keep indices and values */
2026           /* sort by global ordering if using lapack subroutines */
2027           if (!skip_lapack || pcbddc->use_qr_single) {
2028             ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
2029             for (j=0;j<size_of_constraint;j++) {
2030               permutation[j]=j;
2031             }
2032             ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2033             for (j=0;j<size_of_constraint;j++) {
2034               temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2035               temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2036             }
2037             ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2038             ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2039           }
2040           temp_constraints++;
2041           temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2042           total_counts++;
2043         }
2044       }
2045       ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2046       valid_constraints = temp_constraints;
2047       if (!pcbddc->use_nnsp_true && temp_constraints) {
2048         if (temp_constraints == 1) { /* just normalize the constraint */
2049           PetscScalar norm;
2050           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2051           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2052           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2053           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2054         } else { /* perform SVD */
2055           PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2056 
2057 #if defined(PETSC_MISSING_LAPACK_GESVD)
2058           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2059              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2060              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2061                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2062                 from that computed using LAPACKgesvd
2063              -> This is due to a different computation of eigenvectors in LAPACKheev
2064              -> The quality of the POD-computed basis will be the same */
2065           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2066           /* Store upper triangular part of correlation matrix */
2067           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2068           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2069           for (j=0;j<temp_constraints;j++) {
2070             for (k=0;k<j+1;k++) {
2071               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one));
2072             }
2073           }
2074           /* compute eigenvalues and eigenvectors of correlation matrix */
2075           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2076           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2077 #if !defined(PETSC_USE_COMPLEX)
2078           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2079 #else
2080           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2081 #endif
2082           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2083           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2084           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2085           j = 0;
2086           while (j < temp_constraints && singular_vals[j] < tol) j++;
2087           total_counts = total_counts-j;
2088           valid_constraints = temp_constraints-j;
2089           /* scale and copy POD basis into used quadrature memory */
2090           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2091           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2092           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2093           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2094           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2095           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2096           if (j<temp_constraints) {
2097             PetscInt ii;
2098             for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
2099             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2100             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2101             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2102             for (k=0;k<temp_constraints-j;k++) {
2103               for (ii=0;ii<size_of_constraint;ii++) {
2104                 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2105               }
2106             }
2107           }
2108 #else  /* on missing GESVD */
2109           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2110           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2111           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2112           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2113 #if !defined(PETSC_USE_COMPLEX)
2114           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2115 #else
2116           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2117 #endif
2118           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2119           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2120           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2121           k = temp_constraints;
2122           if (k > size_of_constraint) k = size_of_constraint;
2123           j = 0;
2124           while (j < k && singular_vals[k-j-1] < tol) j++;
2125           valid_constraints = k-j;
2126           total_counts = total_counts-temp_constraints+valid_constraints;
2127 #endif /* on missing GESVD */
2128         }
2129       }
2130       /* setting change_of_basis flag is safe now */
2131       if (boolforchange) {
2132         for (j=0;j<valid_constraints;j++) {
2133           PetscBTSet(change_basis,total_counts-j-1);
2134         }
2135       }
2136     }
2137     /* free workspace */
2138     if (!skip_lapack || pcbddc->use_qr_single) {
2139       ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr);
2140     }
2141     if (!skip_lapack) {
2142       ierr = PetscFree(work);CHKERRQ(ierr);
2143 #if defined(PETSC_USE_COMPLEX)
2144       ierr = PetscFree(rwork);CHKERRQ(ierr);
2145 #endif
2146       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2147 #if defined(PETSC_MISSING_LAPACK_GESVD)
2148       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2149       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2150 #endif
2151     }
2152     for (k=0;k<nnsp_size;k++) {
2153       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2154     }
2155     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2156     /* free index sets of faces, edges and vertices */
2157     for (i=0;i<n_ISForFaces;i++) {
2158       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2159     }
2160     if (n_ISForFaces) {
2161       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2162     }
2163     for (i=0;i<n_ISForEdges;i++) {
2164       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2165     }
2166     if (n_ISForEdges) {
2167       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2168     }
2169     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2170   } else {
2171     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2172     PetscInt        cum = 0;
2173 
2174     total_counts = 0;
2175     n_vertices = 0;
2176     if (sub_schurs->is_Ej_com) {
2177       ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr);
2178     }
2179     max_constraints = 0;
2180     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2181       total_counts += pcbddc->adaptive_constraints_n[i];
2182       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2183     }
2184     temp_indices = pcbddc->adaptive_constraints_ptrs;
2185     temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs;
2186     temp_quadrature_constraint = pcbddc->adaptive_constraints_data;
2187 
2188 #if 0
2189     printf("Found %d totals\n",total_counts);
2190     for (i=0;i<total_counts;i++) {
2191       printf("const %d, start %d",i,temp_indices[i]);
2192       printf(" end %d:\n",temp_indices[i+1]);
2193       for (j=temp_indices[i];j<temp_indices[i+1];j++) {
2194         printf("  idxs %d",temp_indices_to_constraint[j]);
2195         printf("  data %1.2e\n",temp_quadrature_constraint[j]);
2196       }
2197     }
2198     for (i=0;i<n_vertices;i++) {
2199       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i+n_vertices]);
2200     }
2201     for (i=0;i<sub_schurs->n_subs;i++) {
2202       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2203     }
2204 #endif
2205 
2206     for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]);
2207     ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr);
2208     /* Change of basis */
2209     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2210     if (pcbddc->use_change_of_basis) {
2211       cum = n_vertices;
2212       for (i=0;i<sub_schurs->n_subs;i++) {
2213         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2214           for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) {
2215             ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr);
2216           }
2217         }
2218         cum += pcbddc->adaptive_constraints_n[i+n_vertices];
2219       }
2220     }
2221   }
2222 
2223   /* map temp_indices_to_constraint in boundary numbering */
2224   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
2225   if (i != temp_indices[total_counts]) {
2226     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
2227   }
2228 
2229   /* set quantities in pcbddc data structure and store previous primal size */
2230   /* n_vertices defines the number of subdomain corners in the primal space */
2231   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
2232   olocal_primal_size = pcbddc->local_primal_size;
2233   pcbddc->local_primal_size = total_counts;
2234   pcbddc->n_vertices = n_vertices;
2235   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
2236 
2237   /* Create constraint matrix */
2238   /* The constraint matrix is used to compute the l2g map of primal dofs */
2239   /* so we need to set it up properly either with or without change of basis */
2240   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2241   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2242   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2243   /* array to compute a local numbering of constraints : vertices first then constraints */
2244   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
2245   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
2246   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
2247   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
2248   /* auxiliary stuff for basis change */
2249   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
2250   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
2251 
2252   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2253   total_primal_vertices=0;
2254   for (i=0;i<pcbddc->local_primal_size;i++) {
2255     size_of_constraint=temp_indices[i+1]-temp_indices[i];
2256     if (size_of_constraint == 1) {
2257       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
2258       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
2259       aux_primal_minloc[total_primal_vertices]=0;
2260       total_primal_vertices++;
2261     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
2262       PetscInt min_loc,min_index;
2263       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
2264       /* find first untouched local node */
2265       k = 0;
2266       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
2267       min_index = global_indices[k];
2268       min_loc = k;
2269       /* search the minimum among global nodes already untouched on the cc */
2270       for (k=1;k<size_of_constraint;k++) {
2271         /* there can be more than one constraint on a single connected component */
2272         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
2273           min_index = global_indices[k];
2274           min_loc = k;
2275         }
2276       }
2277       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
2278       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
2279       aux_primal_minloc[total_primal_vertices]=min_loc;
2280       total_primal_vertices++;
2281     }
2282   }
2283   /* determine if a QR strategy is needed for change of basis */
2284   qr_needed = PETSC_FALSE;
2285   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
2286   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2287     if (PetscBTLookup(change_basis,i)) {
2288       if (!pcbddc->use_qr_single) {
2289         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2290         j = 0;
2291         for (k=0;k<size_of_constraint;k++) {
2292           if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
2293             j++;
2294           }
2295         }
2296         /* found more than one primal dof on the cc */
2297         if (j > 1) {
2298           PetscBTSet(qr_needed_idx,i);
2299           qr_needed = PETSC_TRUE;
2300         }
2301       } else {
2302         PetscBTSet(qr_needed_idx,i);
2303         qr_needed = PETSC_TRUE;
2304       }
2305     }
2306   }
2307   /* free workspace */
2308   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2309 
2310   /* permute indices in order to have a sorted set of vertices */
2311   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
2312 
2313   /* nonzero structure of constraint matrix */
2314   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2315   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
2316   j=total_primal_vertices;
2317   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2318     if (!PetscBTLookup(change_basis,i)) {
2319       nnz[j]=temp_indices[i+1]-temp_indices[i];
2320       j++;
2321     }
2322   }
2323   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2324   ierr = PetscFree(nnz);CHKERRQ(ierr);
2325   /* set values in constraint matrix */
2326   for (i=0;i<total_primal_vertices;i++) {
2327     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2328   }
2329   total_counts = total_primal_vertices;
2330   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2331     if (!PetscBTLookup(change_basis,i)) {
2332       size_of_constraint=temp_indices[i+1]-temp_indices[i];
2333       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
2334       total_counts++;
2335     }
2336   }
2337   /* assembling */
2338   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2339   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2340   /*
2341   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2342   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2343   */
2344   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2345   if (pcbddc->use_change_of_basis) {
2346     /* dual and primal dofs on a single cc */
2347     PetscInt     dual_dofs,primal_dofs;
2348     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
2349     PetscInt     primal_counter;
2350     /* working stuff for GEQRF */
2351     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2352     PetscBLASInt lqr_work;
2353     /* working stuff for UNGQR */
2354     PetscScalar  *gqr_work,lgqr_work_t;
2355     PetscBLASInt lgqr_work;
2356     /* working stuff for TRTRS */
2357     PetscScalar  *trs_rhs;
2358     PetscBLASInt Blas_NRHS;
2359     /* pointers for values insertion into change of basis matrix */
2360     PetscInt     *start_rows,*start_cols;
2361     PetscScalar  *start_vals;
2362     /* working stuff for values insertion */
2363     PetscBT      is_primal;
2364     /* matrix sizes */
2365     PetscInt     global_size,local_size;
2366     /* temporary change of basis */
2367     Mat          localChangeOfBasisMatrix;
2368     /* extra space for debugging */
2369     PetscScalar  *dbg_work;
2370 
2371     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2372     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2373     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2374     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2375     /* nonzeros for local mat */
2376     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2377     for (i=0;i<pcis->n;i++) nnz[i]=1;
2378     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2379       if (PetscBTLookup(change_basis,i)) {
2380         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2381         if (PetscBTLookup(qr_needed_idx,i)) {
2382           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2383         } else {
2384           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2;
2385           /* get local primal index on the cc */
2386           j = 0;
2387           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
2388           nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2389         }
2390       }
2391     }
2392     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2393     ierr = PetscFree(nnz);CHKERRQ(ierr);
2394     /* Set initial identity in the matrix */
2395     for (i=0;i<pcis->n;i++) {
2396       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2397     }
2398 
2399     if (pcbddc->dbg_flag) {
2400       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2401       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2402     }
2403 
2404 
2405     /* Now we loop on the constraints which need a change of basis */
2406     /*
2407        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2408        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2409 
2410        Basic blocks of change of basis matrix T computed by
2411 
2412           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2413 
2414             | 1        0   ...        0         s_1/S |
2415             | 0        1   ...        0         s_2/S |
2416             |              ...                        |
2417             | 0        ...            1     s_{n-1}/S |
2418             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2419 
2420             with S = \sum_{i=1}^n s_i^2
2421             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2422                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2423 
2424           - QR decomposition of constraints otherwise
2425     */
2426     if (qr_needed) {
2427       /* space to store Q */
2428       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2429       /* first we issue queries for optimal work */
2430       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2431       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2432       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2433       lqr_work = -1;
2434       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2435       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2436       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2437       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2438       lgqr_work = -1;
2439       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2440       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2441       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2442       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2443       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2444       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2445       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2446       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2447       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2448       /* array to store scaling factors for reflectors */
2449       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2450       /* array to store rhs and solution of triangular solver */
2451       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2452       /* allocating workspace for check */
2453       if (pcbddc->dbg_flag) {
2454         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
2455       }
2456     }
2457     /* array to store whether a node is primal or not */
2458     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2459     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2460     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2461     if (i != total_primal_vertices) {
2462       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2463     }
2464     for (i=0;i<total_primal_vertices;i++) {
2465       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2466     }
2467     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2468 
2469     /* loop on constraints and see whether or not they need a change of basis and compute it */
2470     /* -> using implicit ordering contained in temp_indices data */
2471     total_counts = pcbddc->n_vertices;
2472     primal_counter = total_counts;
2473     while (total_counts<pcbddc->local_primal_size) {
2474       primal_dofs = 1;
2475       if (PetscBTLookup(change_basis,total_counts)) {
2476         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2477         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) {
2478           primal_dofs++;
2479         }
2480         /* get constraint info */
2481         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2482         dual_dofs = size_of_constraint-primal_dofs;
2483 
2484         if (pcbddc->dbg_flag) {
2485           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2486         }
2487 
2488         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2489 
2490           /* copy quadrature constraints for change of basis check */
2491           if (pcbddc->dbg_flag) {
2492             ierr = PetscMemcpy(dbg_work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2493           }
2494           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2495           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2496 
2497           /* compute QR decomposition of constraints */
2498           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2499           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2500           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2501           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2502           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2503           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2504           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2505 
2506           /* explictly compute R^-T */
2507           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2508           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2509           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2510           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2511           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2512           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2513           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2514           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2515           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2516           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2517 
2518           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2519           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2520           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2521           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2522           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2523           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2524           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2525           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2526           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2527 
2528           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2529              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2530              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2531           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2532           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2533           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2534           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2535           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2536           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2537           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2538           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2539           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2540           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2541 
2542           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2543           start_rows = &temp_indices_to_constraint[temp_indices[total_counts]];
2544           /* insert cols for primal dofs */
2545           for (j=0;j<primal_dofs;j++) {
2546             start_vals = &qr_basis[j*size_of_constraint];
2547             start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2548             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2549           }
2550           /* insert cols for dual dofs */
2551           for (j=0,k=0;j<dual_dofs;k++) {
2552             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2553               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2554               start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k];
2555               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2556               j++;
2557             }
2558           }
2559 
2560           /* check change of basis */
2561           if (pcbddc->dbg_flag) {
2562             PetscInt   ii,jj;
2563             PetscBool valid_qr=PETSC_TRUE;
2564             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2565             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2566             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2567             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2568             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2569             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2570             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2571             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
2572             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2573             for (jj=0;jj<size_of_constraint;jj++) {
2574               for (ii=0;ii<primal_dofs;ii++) {
2575                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2576                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2577               }
2578             }
2579             if (!valid_qr) {
2580               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2581               for (jj=0;jj<size_of_constraint;jj++) {
2582                 for (ii=0;ii<primal_dofs;ii++) {
2583                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2584                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2585                   }
2586                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2587                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2588                   }
2589                 }
2590               }
2591             } else {
2592               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2593             }
2594           }
2595         } else { /* simple transformation block */
2596           PetscInt    row,col;
2597           PetscScalar val,norm;
2598 
2599           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2600           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one));
2601           for (j=0;j<size_of_constraint;j++) {
2602             PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2603             row = temp_indices_to_constraint[temp_indices[total_counts]+j];
2604             if (!PetscBTLookup(is_primal,row_B)) {
2605               col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2606               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2607               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2608             } else {
2609               for (k=0;k<size_of_constraint;k++) {
2610                 col = temp_indices_to_constraint[temp_indices[total_counts]+k];
2611                 if (row != col) {
2612                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2613                 } else {
2614                   val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm;
2615                 }
2616                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2617               }
2618             }
2619           }
2620           if (pcbddc->dbg_flag) {
2621             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2622           }
2623         }
2624         /* increment primal counter */
2625         primal_counter += primal_dofs;
2626       } else {
2627         if (pcbddc->dbg_flag) {
2628           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2629         }
2630       }
2631       /* increment constraint counter total_counts */
2632       total_counts += primal_dofs;
2633     }
2634 
2635     /* free workspace */
2636     if (qr_needed) {
2637       if (pcbddc->dbg_flag) {
2638         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
2639       }
2640       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2641       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2642       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2643       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2644       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2645     }
2646     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2647     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2648     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2649 
2650     /* assembling of global change of variable */
2651     {
2652       Mat      tmat;
2653       PetscInt bs;
2654 
2655       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2656       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2657       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2658       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2659       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2660       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2661       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
2662       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
2663       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2664       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2665       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2666       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2667       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2668       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2669       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2670       ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2671       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2672       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2673     }
2674     /* check */
2675     if (pcbddc->dbg_flag) {
2676       PetscReal error;
2677       Vec       x,x_change;
2678 
2679       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2680       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2681       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2682       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2683       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2684       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2685       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2686       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2687       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2688       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2689       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2690       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2691       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2692       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2693       ierr = VecDestroy(&x);CHKERRQ(ierr);
2694       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2695     }
2696 
2697     /* adapt sub_schurs computed (if any) */
2698     if (pcbddc->use_deluxe_scaling) {
2699       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2700       if (sub_schurs->n_subs_par_g) {
2701         SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Change of basis with deluxe scaling and parallel problems still needs to be implemented");
2702       }
2703       if (sub_schurs->S_Ej_all) {
2704         Mat S_1,S_2,tmat;
2705         IS is_all_N;
2706 
2707         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2708         ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,&tmat);CHKERRQ(ierr);
2709         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2710         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr);
2711         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2712         sub_schurs->S_Ej_all = S_1;
2713         ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr);
2714         ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2715         sub_schurs->sum_S_Ej_all = S_2;
2716         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2717       }
2718     }
2719     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2720   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2721     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2722     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2723   }
2724 
2725   /* set up change of basis context */
2726   if (pcbddc->ChangeOfBasisMatrix) {
2727     PCBDDCChange_ctx change_ctx;
2728 
2729     if (!pcbddc->new_global_mat) {
2730       PetscInt global_size,local_size;
2731 
2732       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2733       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2734       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2735       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2736       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2737       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2738       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2739       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2740       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2741     } else {
2742       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2743       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2744       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2745     }
2746     if (!pcbddc->user_ChangeOfBasisMatrix) {
2747       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2748       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2749     } else {
2750       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2751       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2752     }
2753     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2754     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2755     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2756     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2757   }
2758 
2759   /* get indices in local ordering for vertices and constraints */
2760   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2761     ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
2762     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2763   }
2764   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2765   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2766   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2767   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2768   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2769   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2770   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2771   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2772   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2773   /* set quantities in PCBDDC data struct */
2774   pcbddc->n_actual_vertices = i;
2775   /* check if a new primal space has been introduced */
2776   pcbddc->new_primal_space_local = PETSC_TRUE;
2777   if (olocal_primal_size == pcbddc->local_primal_size) {
2778     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2779     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2780     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2781   }
2782   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2783   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2784 
2785   /* flush dbg viewer */
2786   if (pcbddc->dbg_flag) {
2787     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2788   }
2789 
2790   /* free workspace */
2791   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2792   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2793   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2794   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2795   if (!pcbddc->adaptive_selection) {
2796     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2797     ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr);
2798   } else {
2799     ierr = PetscFree4(pcbddc->adaptive_constraints_n,
2800                       pcbddc->adaptive_constraints_ptrs,
2801                       pcbddc->adaptive_constraints_idxs,
2802                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2803     ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2804   }
2805   PetscFunctionReturn(0);
2806 }
2807 
2808 #undef __FUNCT__
2809 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2810 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2811 {
2812   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2813   PC_IS       *pcis = (PC_IS*)pc->data;
2814   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2815   PetscInt    ierr,i,vertex_size;
2816   PetscViewer viewer=pcbddc->dbg_viewer;
2817 
2818   PetscFunctionBegin;
2819   /* Reset previously computed graph */
2820   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2821   /* Init local Graph struct */
2822   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2823 
2824   /* Check validity of the csr graph passed in by the user */
2825   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2826     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2827   }
2828 
2829   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2830   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2831     PetscInt *xadj,*adjncy;
2832     PetscInt nvtxs;
2833 
2834     if (pcbddc->use_local_adj) {
2835       PetscBool flg_row=PETSC_FALSE;
2836 
2837       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2838       if (flg_row) {
2839         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2840         pcbddc->computed_rowadj = PETSC_TRUE;
2841       }
2842       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2843     } else if (pcbddc->current_level) { /* just compute subdomain's connected components for coarser levels */
2844       IS                     is_dummy;
2845       ISLocalToGlobalMapping l2gmap_dummy;
2846       PetscInt               j,sum;
2847       PetscInt               *cxadj,*cadjncy;
2848       const PetscInt         *idxs;
2849       PCBDDCGraph            graph;
2850       PetscBT                is_on_boundary;
2851 
2852       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
2853       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
2854       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
2855       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
2856       ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr);
2857       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
2858       graph->xadj = xadj;
2859       graph->adjncy = adjncy;
2860       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
2861       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
2862 
2863       if (pcbddc->dbg_flag) {
2864         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr);
2865         for (i=0;i<graph->ncc;i++) {
2866           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
2867         }
2868         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2869       }
2870 
2871       ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr);
2872       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2873       for (i=0;i<pcis->n_B;i++) {
2874         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
2875       }
2876       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2877 
2878       ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr);
2879       sum = 0;
2880       for (i=0;i<graph->ncc;i++) {
2881         PetscInt sizecc = 0;
2882         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2883           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2884             sizecc++;
2885           }
2886         }
2887         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2888           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2889             cxadj[graph->queue[j]] = sizecc;
2890           }
2891         }
2892         sum += sizecc*sizecc;
2893       }
2894       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
2895       sum = 0;
2896       for (i=0;i<nvtxs;i++) {
2897         PetscInt temp = cxadj[i];
2898         cxadj[i] = sum;
2899         sum += temp;
2900       }
2901       cxadj[nvtxs] = sum;
2902       for (i=0;i<graph->ncc;i++) {
2903         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2904           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2905             PetscInt k,sizecc = 0;
2906             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
2907               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
2908                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
2909                 sizecc++;
2910               }
2911             }
2912           }
2913         }
2914       }
2915       if (nvtxs) {
2916         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
2917       } else {
2918         ierr = PetscFree(cxadj);CHKERRQ(ierr);
2919         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
2920       }
2921       graph->xadj = 0;
2922       graph->adjncy = 0;
2923       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
2924       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
2925     }
2926   }
2927 
2928   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2929   vertex_size = 1;
2930   if (pcbddc->user_provided_isfordofs) {
2931     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2932       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2933       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2934         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2935         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2936       }
2937       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2938       pcbddc->n_ISForDofs = 0;
2939       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2940     }
2941     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2942     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2943   } else {
2944     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2945       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2946       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2947       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2948         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2949       }
2950     }
2951   }
2952 
2953   /* Setup of Graph */
2954   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2955     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2956   }
2957   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2958     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2959   }
2960   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
2961 
2962   /* Graph's connected components analysis */
2963   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2964 
2965   /* print some info to stdout */
2966   if (pcbddc->dbg_flag) {
2967     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2968   }
2969 
2970   /* mark topography has done */
2971   pcbddc->recompute_topography = PETSC_FALSE;
2972   PetscFunctionReturn(0);
2973 }
2974 
2975 #undef __FUNCT__
2976 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2977 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2978 {
2979   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2980   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2981   PetscErrorCode ierr;
2982 
2983   PetscFunctionBegin;
2984   n = 0;
2985   vertices = 0;
2986   if (pcbddc->ConstraintMatrix) {
2987     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2988     for (i=0;i<local_primal_size;i++) {
2989       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2990       if (size_of_constraint == 1) n++;
2991       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2992     }
2993     if (vertices_idx) {
2994       ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr);
2995       n = 0;
2996       for (i=0;i<local_primal_size;i++) {
2997         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2998         if (size_of_constraint == 1) {
2999           vertices[n++]=row_cmat_indices[0];
3000         }
3001         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3002       }
3003     }
3004   }
3005   *n_vertices = n;
3006   if (vertices_idx) *vertices_idx = vertices;
3007   PetscFunctionReturn(0);
3008 }
3009 
3010 #undef __FUNCT__
3011 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
3012 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
3013 {
3014   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
3015   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
3016   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
3017   PetscBT        touched;
3018   PetscErrorCode ierr;
3019 
3020     /* This function assumes that the number of local constraints per connected component
3021        is not greater than the number of nodes defined for the connected component
3022        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
3023   PetscFunctionBegin;
3024   n = 0;
3025   constraints_index = 0;
3026   if (pcbddc->ConstraintMatrix) {
3027     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
3028     max_size_of_constraint = 0;
3029     for (i=0;i<local_primal_size;i++) {
3030       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3031       if (size_of_constraint > 1) {
3032         n++;
3033       }
3034       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
3035       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3036     }
3037     if (constraints_idx) {
3038       ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr);
3039       ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr);
3040       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
3041       n = 0;
3042       for (i=0;i<local_primal_size;i++) {
3043         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3044         if (size_of_constraint > 1) {
3045           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
3046           /* find first untouched local node */
3047           j = 0;
3048           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
3049           min_index = row_cmat_global_indices[j];
3050           min_loc = j;
3051           /* search the minimum among nodes not yet touched on the connected component
3052              since there can be more than one constraint on a single cc */
3053           for (j=1;j<size_of_constraint;j++) {
3054             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
3055               min_index = row_cmat_global_indices[j];
3056               min_loc = j;
3057             }
3058           }
3059           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
3060           constraints_index[n++] = row_cmat_indices[min_loc];
3061         }
3062         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3063       }
3064       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
3065       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
3066     }
3067   }
3068   *n_constraints = n;
3069   if (constraints_idx) *constraints_idx = constraints_index;
3070   PetscFunctionReturn(0);
3071 }
3072 
3073 #undef __FUNCT__
3074 #define __FUNCT__ "PCBDDCSubsetNumbering"
3075 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
3076 {
3077   Vec            local_vec,global_vec;
3078   IS             seqis,paris;
3079   VecScatter     scatter_ctx;
3080   PetscScalar    *array;
3081   PetscInt       *temp_global_dofs;
3082   PetscScalar    globalsum;
3083   PetscInt       i,j,s;
3084   PetscInt       nlocals,first_index,old_index,max_local;
3085   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
3086   PetscMPIInt    *dof_sizes,*dof_displs;
3087   PetscBool      first_found;
3088   PetscErrorCode ierr;
3089 
3090   PetscFunctionBegin;
3091   /* mpi buffers */
3092   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
3093   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
3094   j = ( !rank_prec_comm ? size_prec_comm : 0);
3095   ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr);
3096   ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr);
3097   /* get maximum size of subset */
3098   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
3099   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
3100   max_local = 0;
3101   for (i=0;i<n_local_dofs;i++) {
3102     if (max_local < temp_global_dofs[i] ) {
3103       max_local = temp_global_dofs[i];
3104     }
3105   }
3106   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
3107   max_global++;
3108   max_local = 0;
3109   for (i=0;i<n_local_dofs;i++) {
3110     if (max_local < local_dofs[i] ) {
3111       max_local = local_dofs[i];
3112     }
3113   }
3114   max_local++;
3115   /* allocate workspace */
3116   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
3117   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
3118   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
3119   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
3120   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
3121   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
3122   /* create scatter */
3123   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
3124   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
3125   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
3126   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
3127   ierr = ISDestroy(&paris);CHKERRQ(ierr);
3128   /* init array */
3129   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
3130   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3131   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3132   if (local_dofs_mult) {
3133     for (i=0;i<n_local_dofs;i++) {
3134       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
3135     }
3136   } else {
3137     for (i=0;i<n_local_dofs;i++) {
3138       array[local_dofs[i]]=1.0;
3139     }
3140   }
3141   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3142   /* scatter into global vec and get total number of global dofs */
3143   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3144   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3145   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
3146   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
3147   /* Fill global_vec with cumulative function for global numbering */
3148   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
3149   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
3150   nlocals = 0;
3151   first_index = -1;
3152   first_found = PETSC_FALSE;
3153   for (i=0;i<s;i++) {
3154     if (!first_found && PetscRealPart(array[i]) > 0.1) {
3155       first_found = PETSC_TRUE;
3156       first_index = i;
3157     }
3158     nlocals += (PetscInt)PetscRealPart(array[i]);
3159   }
3160   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3161   if (!rank_prec_comm) {
3162     dof_displs[0]=0;
3163     for (i=1;i<size_prec_comm;i++) {
3164       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3165     }
3166   }
3167   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3168   if (first_found) {
3169     array[first_index] += (PetscScalar)nlocals;
3170     old_index = first_index;
3171     for (i=first_index+1;i<s;i++) {
3172       if (PetscRealPart(array[i]) > 0.1) {
3173         array[i] += array[old_index];
3174         old_index = i;
3175       }
3176     }
3177   }
3178   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
3179   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3180   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3181   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3182   /* get global ordering of local dofs */
3183   ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3184   if (local_dofs_mult) {
3185     for (i=0;i<n_local_dofs;i++) {
3186       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
3187     }
3188   } else {
3189     for (i=0;i<n_local_dofs;i++) {
3190       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
3191     }
3192   }
3193   ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3194   /* free workspace */
3195   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
3196   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
3197   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
3198   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
3199   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
3200   /* return pointer to global ordering of local dofs */
3201   *global_numbering_subset = temp_global_dofs;
3202   PetscFunctionReturn(0);
3203 }
3204 
3205 #undef __FUNCT__
3206 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3207 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3208 {
3209   PetscInt       i,j;
3210   PetscScalar    *alphas;
3211   PetscErrorCode ierr;
3212 
3213   PetscFunctionBegin;
3214   /* this implements stabilized Gram-Schmidt */
3215   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3216   for (i=0;i<n;i++) {
3217     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3218     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3219     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3220   }
3221   ierr = PetscFree(alphas);CHKERRQ(ierr);
3222   PetscFunctionReturn(0);
3223 }
3224 
3225 #undef __FUNCT__
3226 #define __FUNCT__ "MatISGetSubassemblingPattern"
3227 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
3228 {
3229   Mat             subdomain_adj;
3230   IS              new_ranks,ranks_send_to;
3231   MatPartitioning partitioner;
3232   Mat_IS          *matis;
3233   PetscInt        n_neighs,*neighs,*n_shared,**shared;
3234   PetscInt        prank;
3235   PetscMPIInt     size,rank,color;
3236   PetscInt        *xadj,*adjncy,*oldranks;
3237   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
3238   PetscInt        i,local_size,threshold=0;
3239   PetscErrorCode  ierr;
3240   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3241   PetscSubcomm    subcomm;
3242 
3243   PetscFunctionBegin;
3244   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3245   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3246   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3247 
3248   /* Get info on mapping */
3249   matis = (Mat_IS*)(mat->data);
3250   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3251   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3252 
3253   /* build local CSR graph of subdomains' connectivity */
3254   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3255   xadj[0] = 0;
3256   xadj[1] = PetscMax(n_neighs-1,0);
3257   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3258   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3259 
3260   if (threshold) {
3261     PetscInt xadj_count = 0;
3262     for (i=1;i<n_neighs;i++) {
3263       if (n_shared[i] > threshold) {
3264         adjncy[xadj_count] = neighs[i];
3265         adjncy_wgt[xadj_count] = n_shared[i];
3266         xadj_count++;
3267       }
3268     }
3269     xadj[1] = xadj_count;
3270   } else {
3271     if (xadj[1]) {
3272       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3273       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3274     }
3275   }
3276   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3277   if (use_square) {
3278     for (i=0;i<xadj[1];i++) {
3279       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3280     }
3281   }
3282   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3283 
3284   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3285 
3286   /*
3287     Restrict work on active processes only.
3288   */
3289   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3290   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3291   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3292   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3293   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3294   if (color) {
3295     ierr = PetscFree(xadj);CHKERRQ(ierr);
3296     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3297     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3298   } else {
3299     PetscInt coarsening_ratio;
3300     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
3301     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3302     prank = rank;
3303     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
3304     /*
3305     for (i=0;i<size;i++) {
3306       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3307     }
3308     */
3309     for (i=0;i<xadj[1];i++) {
3310       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3311     }
3312     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3313     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3314     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3315 
3316     /* Partition */
3317     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
3318     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3319     if (use_vwgt) {
3320       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3321       v_wgt[0] = local_size;
3322       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3323     }
3324     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3325     coarsening_ratio = size/n_subdomains;
3326     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3327     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3328     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3329     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3330 
3331     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3332     if (contiguous) {
3333       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3334     } else {
3335       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3336     }
3337     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3338     /* clean up */
3339     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3340     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3341     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3342     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3343   }
3344   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3345 
3346   /* assemble parallel IS for sends */
3347   i = 1;
3348   if (color) i=0;
3349   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3350 
3351   /* get back IS */
3352   *is_sends = ranks_send_to;
3353   PetscFunctionReturn(0);
3354 }
3355 
3356 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3357 
3358 #undef __FUNCT__
3359 #define __FUNCT__ "MatISSubassemble"
3360 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3361 {
3362   Mat                    local_mat;
3363   Mat_IS                 *matis;
3364   IS                     is_sends_internal;
3365   PetscInt               rows,cols,new_local_rows;
3366   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3367   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3368   ISLocalToGlobalMapping l2gmap;
3369   PetscInt*              l2gmap_indices;
3370   const PetscInt*        is_indices;
3371   MatType                new_local_type;
3372   /* buffers */
3373   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3374   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3375   PetscInt               *recv_buffer_idxs_local;
3376   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3377   /* MPI */
3378   MPI_Comm               comm,comm_n;
3379   PetscSubcomm           subcomm;
3380   PetscMPIInt            n_sends,n_recvs,commsize;
3381   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3382   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3383   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3384   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3385   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3386   PetscErrorCode         ierr;
3387 
3388   PetscFunctionBegin;
3389   /* TODO: add missing checks */
3390   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3391   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3392   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3393   PetscValidLogicalCollectiveInt(mat,nis,7);
3394   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3395   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3396   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3397   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3398   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3399   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3400   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3401   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3402     PetscInt mrows,mcols,mnrows,mncols;
3403     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3404     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3405     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3406     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3407     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3408     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3409   }
3410   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3411   PetscValidLogicalCollectiveInt(mat,bs,0);
3412   /* prepare IS for sending if not provided */
3413   if (!is_sends) {
3414     PetscBool pcontig = PETSC_TRUE;
3415     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3416     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3417   } else {
3418     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3419     is_sends_internal = is_sends;
3420   }
3421 
3422   /* get pointer of MATIS data */
3423   matis = (Mat_IS*)mat->data;
3424 
3425   /* get comm */
3426   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3427 
3428   /* compute number of sends */
3429   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3430   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3431 
3432   /* compute number of receives */
3433   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3434   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3435   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3436   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3437   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3438   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3439   ierr = PetscFree(iflags);CHKERRQ(ierr);
3440 
3441   /* restrict comm if requested */
3442   subcomm = 0;
3443   destroy_mat = PETSC_FALSE;
3444   if (restrict_comm) {
3445     PetscMPIInt color,subcommsize;
3446 
3447     color = 0;
3448     if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */
3449     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3450     subcommsize = commsize - subcommsize;
3451     /* check if reuse has been requested */
3452     if (reuse == MAT_REUSE_MATRIX) {
3453       if (*mat_n) {
3454         PetscMPIInt subcommsize2;
3455         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3456         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3457         comm_n = PetscObjectComm((PetscObject)*mat_n);
3458       } else {
3459         comm_n = PETSC_COMM_SELF;
3460       }
3461     } else { /* MAT_INITIAL_MATRIX */
3462       PetscMPIInt rank;
3463 
3464       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3465       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3466       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3467       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3468       comm_n = subcomm->comm;
3469     }
3470     /* flag to destroy *mat_n if not significative */
3471     if (color) destroy_mat = PETSC_TRUE;
3472   } else {
3473     comm_n = comm;
3474   }
3475 
3476   /* prepare send/receive buffers */
3477   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3478   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3479   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3480   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3481   if (nis) {
3482     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3483   }
3484 
3485   /* Get data from local matrices */
3486   if (!isdense) {
3487     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3488     /* TODO: See below some guidelines on how to prepare the local buffers */
3489     /*
3490        send_buffer_vals should contain the raw values of the local matrix
3491        send_buffer_idxs should contain:
3492        - MatType_PRIVATE type
3493        - PetscInt        size_of_l2gmap
3494        - PetscInt        global_row_indices[size_of_l2gmap]
3495        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3496     */
3497   } else {
3498     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3499     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3500     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3501     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3502     send_buffer_idxs[1] = i;
3503     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3504     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3505     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3506     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3507     for (i=0;i<n_sends;i++) {
3508       ilengths_vals[is_indices[i]] = len*len;
3509       ilengths_idxs[is_indices[i]] = len+2;
3510     }
3511   }
3512   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3513   /* additional is (if any) */
3514   if (nis) {
3515     PetscMPIInt psum;
3516     PetscInt j;
3517     for (j=0,psum=0;j<nis;j++) {
3518       PetscInt plen;
3519       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3520       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3521       psum += len+1; /* indices + lenght */
3522     }
3523     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3524     for (j=0,psum=0;j<nis;j++) {
3525       PetscInt plen;
3526       const PetscInt *is_array_idxs;
3527       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3528       send_buffer_idxs_is[psum] = plen;
3529       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3530       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3531       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3532       psum += plen+1; /* indices + lenght */
3533     }
3534     for (i=0;i<n_sends;i++) {
3535       ilengths_idxs_is[is_indices[i]] = psum;
3536     }
3537     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3538   }
3539 
3540   buf_size_idxs = 0;
3541   buf_size_vals = 0;
3542   buf_size_idxs_is = 0;
3543   for (i=0;i<n_recvs;i++) {
3544     buf_size_idxs += (PetscInt)olengths_idxs[i];
3545     buf_size_vals += (PetscInt)olengths_vals[i];
3546     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3547   }
3548   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3549   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3550   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3551 
3552   /* get new tags for clean communications */
3553   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3554   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3555   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3556 
3557   /* allocate for requests */
3558   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3559   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3560   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3561   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3562   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3563   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3564 
3565   /* communications */
3566   ptr_idxs = recv_buffer_idxs;
3567   ptr_vals = recv_buffer_vals;
3568   ptr_idxs_is = recv_buffer_idxs_is;
3569   for (i=0;i<n_recvs;i++) {
3570     source_dest = onodes[i];
3571     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3572     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3573     ptr_idxs += olengths_idxs[i];
3574     ptr_vals += olengths_vals[i];
3575     if (nis) {
3576       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3577       ptr_idxs_is += olengths_idxs_is[i];
3578     }
3579   }
3580   for (i=0;i<n_sends;i++) {
3581     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3582     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3583     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3584     if (nis) {
3585       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3586     }
3587   }
3588   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3589   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3590 
3591   /* assemble new l2g map */
3592   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3593   ptr_idxs = recv_buffer_idxs;
3594   new_local_rows = 0;
3595   for (i=0;i<n_recvs;i++) {
3596     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3597     ptr_idxs += olengths_idxs[i];
3598   }
3599   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3600   ptr_idxs = recv_buffer_idxs;
3601   new_local_rows = 0;
3602   for (i=0;i<n_recvs;i++) {
3603     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3604     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3605     ptr_idxs += olengths_idxs[i];
3606   }
3607   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3608   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3609   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3610 
3611   /* infer new local matrix type from received local matrices type */
3612   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3613   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3614   if (n_recvs) {
3615     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3616     ptr_idxs = recv_buffer_idxs;
3617     for (i=0;i<n_recvs;i++) {
3618       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3619         new_local_type_private = MATAIJ_PRIVATE;
3620         break;
3621       }
3622       ptr_idxs += olengths_idxs[i];
3623     }
3624     switch (new_local_type_private) {
3625       case MATDENSE_PRIVATE:
3626         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3627           new_local_type = MATSEQAIJ;
3628           bs = 1;
3629         } else { /* if I receive only 1 dense matrix */
3630           new_local_type = MATSEQDENSE;
3631           bs = 1;
3632         }
3633         break;
3634       case MATAIJ_PRIVATE:
3635         new_local_type = MATSEQAIJ;
3636         bs = 1;
3637         break;
3638       case MATBAIJ_PRIVATE:
3639         new_local_type = MATSEQBAIJ;
3640         break;
3641       case MATSBAIJ_PRIVATE:
3642         new_local_type = MATSEQSBAIJ;
3643         break;
3644       default:
3645         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3646         break;
3647     }
3648   } else { /* by default, new_local_type is seqdense */
3649     new_local_type = MATSEQDENSE;
3650     bs = 1;
3651   }
3652 
3653   /* create MATIS object if needed */
3654   if (reuse == MAT_INITIAL_MATRIX) {
3655     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3656     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3657   } else {
3658     /* it also destroys the local matrices */
3659     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3660   }
3661   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3662   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3663 
3664   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3665 
3666   /* Global to local map of received indices */
3667   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3668   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3669   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3670 
3671   /* restore attributes -> type of incoming data and its size */
3672   buf_size_idxs = 0;
3673   for (i=0;i<n_recvs;i++) {
3674     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3675     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3676     buf_size_idxs += (PetscInt)olengths_idxs[i];
3677   }
3678   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3679 
3680   /* set preallocation */
3681   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3682   if (!newisdense) {
3683     PetscInt *new_local_nnz=0;
3684 
3685     ptr_vals = recv_buffer_vals;
3686     ptr_idxs = recv_buffer_idxs_local;
3687     if (n_recvs) {
3688       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3689     }
3690     for (i=0;i<n_recvs;i++) {
3691       PetscInt j;
3692       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3693         for (j=0;j<*(ptr_idxs+1);j++) {
3694           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3695         }
3696       } else {
3697         /* TODO */
3698       }
3699       ptr_idxs += olengths_idxs[i];
3700     }
3701     if (new_local_nnz) {
3702       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3703       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3704       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3705       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3706       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3707       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3708     } else {
3709       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3710     }
3711     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3712   } else {
3713     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3714   }
3715 
3716   /* set values */
3717   ptr_vals = recv_buffer_vals;
3718   ptr_idxs = recv_buffer_idxs_local;
3719   for (i=0;i<n_recvs;i++) {
3720     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3721       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3722       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3723       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3724       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3725       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3726     } else {
3727       /* TODO */
3728     }
3729     ptr_idxs += olengths_idxs[i];
3730     ptr_vals += olengths_vals[i];
3731   }
3732   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3733   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3734   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3735   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3736   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3737   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3738 
3739 #if 0
3740   if (!restrict_comm) { /* check */
3741     Vec       lvec,rvec;
3742     PetscReal infty_error;
3743 
3744     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3745     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3746     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3747     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3748     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3749     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3750     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3751     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3752     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3753   }
3754 #endif
3755 
3756   /* assemble new additional is (if any) */
3757   if (nis) {
3758     PetscInt **temp_idxs,*count_is,j,psum;
3759 
3760     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3761     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3762     ptr_idxs = recv_buffer_idxs_is;
3763     psum = 0;
3764     for (i=0;i<n_recvs;i++) {
3765       for (j=0;j<nis;j++) {
3766         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3767         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3768         psum += plen;
3769         ptr_idxs += plen+1; /* shift pointer to received data */
3770       }
3771     }
3772     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3773     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3774     for (i=1;i<nis;i++) {
3775       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3776     }
3777     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3778     ptr_idxs = recv_buffer_idxs_is;
3779     for (i=0;i<n_recvs;i++) {
3780       for (j=0;j<nis;j++) {
3781         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3782         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3783         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3784         ptr_idxs += plen+1; /* shift pointer to received data */
3785       }
3786     }
3787     for (i=0;i<nis;i++) {
3788       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3789       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3790       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3791     }
3792     ierr = PetscFree(count_is);CHKERRQ(ierr);
3793     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3794     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3795   }
3796   /* free workspace */
3797   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3798   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3799   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3800   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3801   if (isdense) {
3802     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3803     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3804   } else {
3805     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3806   }
3807   if (nis) {
3808     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3809     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3810   }
3811   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3812   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3813   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3814   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3815   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3816   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3817   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3818   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3819   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3820   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3821   ierr = PetscFree(onodes);CHKERRQ(ierr);
3822   if (nis) {
3823     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3824     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3825     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3826   }
3827   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3828   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3829     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3830     for (i=0;i<nis;i++) {
3831       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3832     }
3833   }
3834   PetscFunctionReturn(0);
3835 }
3836 
3837 /* temporary hack into ksp private data structure */
3838 #include <petsc-private/kspimpl.h>
3839 
3840 #undef __FUNCT__
3841 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3842 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3843 {
3844   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3845   PC_IS                  *pcis = (PC_IS*)pc->data;
3846   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3847   MatNullSpace           CoarseNullSpace=NULL;
3848   ISLocalToGlobalMapping coarse_islg;
3849   IS                     coarse_is,*isarray;
3850   PetscInt               i,im_active=-1,active_procs=-1;
3851   PetscInt               nis,nisdofs,nisneu;
3852   PC                     pc_temp;
3853   PCType                 coarse_pc_type;
3854   KSPType                coarse_ksp_type;
3855   PetscBool              multilevel_requested,multilevel_allowed;
3856   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3857   Mat                    t_coarse_mat_is;
3858   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3859   PetscMPIInt            all_procs;
3860   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3861   PetscBool              compute_vecs = PETSC_FALSE;
3862   PetscScalar            *array;
3863   PetscErrorCode         ierr;
3864 
3865   PetscFunctionBegin;
3866   /* Assign global numbering to coarse dofs */
3867   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3868     compute_vecs = PETSC_TRUE;
3869     PetscInt ocoarse_size;
3870     ocoarse_size = pcbddc->coarse_size;
3871     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3872     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3873     /* see if we can avoid some work */
3874     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3875       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3876         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3877         coarse_reuse = PETSC_FALSE;
3878       } else { /* we can safely reuse already computed coarse matrix */
3879         coarse_reuse = PETSC_TRUE;
3880       }
3881     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3882       coarse_reuse = PETSC_FALSE;
3883     }
3884     /* reset any subassembling information */
3885     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3886     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3887   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3888     coarse_reuse = PETSC_TRUE;
3889   }
3890 
3891   /* count "active" (i.e. with positive local size) and "void" processes */
3892   im_active = !!(pcis->n);
3893   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3894   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3895   void_procs = all_procs-active_procs;
3896   csin_type_simple = PETSC_TRUE;
3897   redist = PETSC_FALSE;
3898   if (pcbddc->current_level && void_procs) {
3899     csin_ml = PETSC_TRUE;
3900     ncoarse_ml = void_procs;
3901     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
3902     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
3903       csin_ds = PETSC_TRUE;
3904       ncoarse_ds = pcbddc->redistribute_coarse;
3905       redist = PETSC_TRUE;
3906     } else {
3907       csin_ds = PETSC_TRUE;
3908       ncoarse_ds = active_procs;
3909       redist = PETSC_TRUE;
3910     }
3911   } else {
3912     csin_ml = PETSC_FALSE;
3913     ncoarse_ml = all_procs;
3914     if (void_procs) {
3915       csin_ds = PETSC_TRUE;
3916       ncoarse_ds = void_procs;
3917       csin_type_simple = PETSC_FALSE;
3918     } else {
3919       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
3920         csin_ds = PETSC_TRUE;
3921         ncoarse_ds = pcbddc->redistribute_coarse;
3922         redist = PETSC_TRUE;
3923       } else {
3924         csin_ds = PETSC_FALSE;
3925         ncoarse_ds = all_procs;
3926       }
3927     }
3928   }
3929 
3930   /*
3931     test if we can go multilevel: three conditions must be satisfied:
3932     - we have not exceeded the number of levels requested
3933     - we can actually subassemble the active processes
3934     - we can find a suitable number of MPI processes where we can place the subassembled problem
3935   */
3936   multilevel_allowed = PETSC_FALSE;
3937   multilevel_requested = PETSC_FALSE;
3938   if (pcbddc->current_level < pcbddc->max_levels) {
3939     multilevel_requested = PETSC_TRUE;
3940     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3941       multilevel_allowed = PETSC_FALSE;
3942     } else {
3943       multilevel_allowed = PETSC_TRUE;
3944     }
3945   }
3946   /* determine number of process partecipating to coarse solver */
3947   if (multilevel_allowed) {
3948     ncoarse = ncoarse_ml;
3949     csin = csin_ml;
3950     redist = PETSC_FALSE;
3951   } else {
3952     ncoarse = ncoarse_ds;
3953     csin = csin_ds;
3954   }
3955 
3956   /* creates temporary l2gmap and IS for coarse indexes */
3957   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3958   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3959 
3960   /* creates temporary MATIS object for coarse matrix */
3961   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
3962   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3963   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
3964   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3965 #if 0
3966   {
3967     PetscViewer viewer;
3968     char filename[256];
3969     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
3970     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
3971     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3972     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
3973     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3974   }
3975 #endif
3976   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
3977   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3978   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3979   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3980   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3981 
3982   /* compute dofs splitting and neumann boundaries for coarse dofs */
3983   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
3984     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3985     const PetscInt         *idxs;
3986     ISLocalToGlobalMapping tmap;
3987 
3988     /* create map between primal indices (in local representative ordering) and local primal numbering */
3989     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3990     /* allocate space for temporary storage */
3991     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
3992     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
3993     /* allocate for IS array */
3994     nisdofs = pcbddc->n_ISForDofsLocal;
3995     nisneu = !!pcbddc->NeumannBoundariesLocal;
3996     nis = nisdofs + nisneu;
3997     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
3998     /* dofs splitting */
3999     for (i=0;i<nisdofs;i++) {
4000       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4001       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4002       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4003       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4004       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4005       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4006       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4007       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4008     }
4009     /* neumann boundaries */
4010     if (pcbddc->NeumannBoundariesLocal) {
4011       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4012       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4013       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4014       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4015       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4016       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4017       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4018       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4019     }
4020     /* free memory */
4021     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4022     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4023     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4024   } else {
4025     nis = 0;
4026     nisdofs = 0;
4027     nisneu = 0;
4028     isarray = NULL;
4029   }
4030   /* destroy no longer needed map */
4031   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4032 
4033   /* restrict on coarse candidates (if needed) */
4034   coarse_mat_is = NULL;
4035   if (csin) {
4036     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4037       if (redist) {
4038         PetscMPIInt rank;
4039         PetscInt    spc,n_spc_p1,dest[1],destsize;
4040 
4041         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4042         spc = active_procs/ncoarse;
4043         n_spc_p1 = active_procs%ncoarse;
4044         if (im_active) {
4045           destsize = 1;
4046           if (rank > n_spc_p1*(spc+1)-1) {
4047             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4048           } else {
4049             dest[0] = rank/(spc+1);
4050           }
4051         } else {
4052           destsize = 0;
4053         }
4054         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4055       } else if (csin_type_simple) {
4056         PetscMPIInt rank;
4057         PetscInt    issize,isidx;
4058 
4059         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4060         if (im_active) {
4061           issize = 1;
4062           isidx = (PetscInt)rank;
4063         } else {
4064           issize = 0;
4065           isidx = -1;
4066         }
4067         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4068       } else { /* get a suitable subassembling pattern from MATIS code */
4069         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4070       }
4071 
4072       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4073       if (!redist || ncoarse <= void_procs) {
4074         PetscInt ncoarse_cand,tissize,*nisindices;
4075         PetscInt *coarse_candidates;
4076         const PetscInt* tisindices;
4077 
4078         /* get coarse candidates' ranks in pc communicator */
4079         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4080         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4081         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4082           if (!coarse_candidates[i]) {
4083             coarse_candidates[ncoarse_cand++]=i;
4084           }
4085         }
4086         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4087 
4088 
4089         if (pcbddc->dbg_flag) {
4090           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4091           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4092           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4093           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4094           for (i=0;i<ncoarse_cand;i++) {
4095             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4096           }
4097           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4098           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4099         }
4100         /* shift the pattern on coarse candidates */
4101         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4102         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4103         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4104         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4105         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4106         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4107         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4108       }
4109       if (pcbddc->dbg_flag) {
4110         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4111         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4112         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4113         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4114       }
4115     }
4116     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4117     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4118   } else {
4119     if (pcbddc->dbg_flag) {
4120       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4121       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4122       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4123     }
4124     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4125     coarse_mat_is = t_coarse_mat_is;
4126   }
4127 
4128   /* create local to global scatters for coarse problem */
4129   if (compute_vecs) {
4130     PetscInt lrows;
4131     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4132     if (coarse_mat_is) {
4133       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4134     } else {
4135       lrows = 0;
4136     }
4137     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4138     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4139     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4140     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4141     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4142   }
4143   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4144   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4145 
4146   /* set defaults for coarse KSP and PC */
4147   if (multilevel_allowed) {
4148     coarse_ksp_type = KSPRICHARDSON;
4149     coarse_pc_type = PCBDDC;
4150   } else {
4151     coarse_ksp_type = KSPPREONLY;
4152     coarse_pc_type = PCREDUNDANT;
4153   }
4154 
4155   /* print some info if requested */
4156   if (pcbddc->dbg_flag) {
4157     if (!multilevel_allowed) {
4158       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4159       if (multilevel_requested) {
4160         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4161       } else if (pcbddc->max_levels) {
4162         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4163       }
4164       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4165     }
4166   }
4167 
4168   /* create the coarse KSP object only once with defaults */
4169   if (coarse_mat_is) {
4170     MatReuse coarse_mat_reuse;
4171     PetscViewer dbg_viewer = NULL;
4172     if (pcbddc->dbg_flag) {
4173       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4174       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4175     }
4176     if (!pcbddc->coarse_ksp) {
4177       char prefix[256],str_level[16];
4178       size_t len;
4179       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4180       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4181       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4182       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4183       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4184       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4185       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4186       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4187       /* prefix */
4188       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4189       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4190       if (!pcbddc->current_level) {
4191         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4192         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4193       } else {
4194         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4195         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4196         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4197         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4198         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4199         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4200       }
4201       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4202       /* allow user customization */
4203       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4204       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4205     }
4206 
4207     /* get some info after set from options */
4208     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4209     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4210     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4211     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4212     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4213       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4214       isbddc = PETSC_FALSE;
4215     }
4216     if (isredundant) {
4217       KSP inner_ksp;
4218       PC inner_pc;
4219       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4220       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4221       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4222     }
4223 
4224     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4225     ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4226     ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4227     ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4228     if (nisdofs) {
4229       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4230       for (i=0;i<nisdofs;i++) {
4231         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4232       }
4233     }
4234     if (nisneu) {
4235       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4236       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4237     }
4238 
4239     /* assemble coarse matrix */
4240     if (coarse_reuse) {
4241       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4242       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4243       coarse_mat_reuse = MAT_REUSE_MATRIX;
4244     } else {
4245       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4246     }
4247     if (isbddc || isnn) {
4248       if (pcbddc->coarsening_ratio > 1) {
4249         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4250           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4251           if (pcbddc->dbg_flag) {
4252             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4253             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4254             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4255             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4256           }
4257         }
4258         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4259       } else {
4260         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4261         coarse_mat = coarse_mat_is;
4262       }
4263     } else {
4264       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4265     }
4266     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4267 
4268     /* propagate symmetry info to coarse matrix */
4269     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
4270     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4271 
4272     /* set operators */
4273     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4274     if (pcbddc->dbg_flag) {
4275       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4276     }
4277   } else { /* processes non partecipating to coarse solver (if any) */
4278     coarse_mat = 0;
4279   }
4280   ierr = PetscFree(isarray);CHKERRQ(ierr);
4281 #if 0
4282   {
4283     PetscViewer viewer;
4284     char filename[256];
4285     sprintf(filename,"coarse_mat.m");
4286     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4287     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4288     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4289     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4290   }
4291 #endif
4292 
4293   /* Compute coarse null space (special handling by BDDC only) */
4294   if (pcbddc->NullSpace) {
4295     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4296   }
4297 
4298   if (pcbddc->coarse_ksp) {
4299     Vec crhs,csol;
4300     PetscBool ispreonly;
4301     if (CoarseNullSpace) {
4302       if (isbddc) {
4303         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4304       } else {
4305         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4306       }
4307     }
4308     /* setup coarse ksp */
4309     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4310     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4311     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4312     /* hack */
4313     if (!csol) {
4314       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4315     }
4316     if (!crhs) {
4317       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4318     }
4319     /* Check coarse problem if in debug mode or if solving with an iterative method */
4320     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4321     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4322       KSP       check_ksp;
4323       KSPType   check_ksp_type;
4324       PC        check_pc;
4325       Vec       check_vec,coarse_vec;
4326       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4327       PetscInt  its;
4328       PetscBool compute_eigs;
4329       PetscReal *eigs_r,*eigs_c;
4330       PetscInt  neigs;
4331       const char *prefix;
4332 
4333       /* Create ksp object suitable for estimation of extreme eigenvalues */
4334       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4335       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4336       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4337       if (ispreonly) {
4338         check_ksp_type = KSPPREONLY;
4339         compute_eigs = PETSC_FALSE;
4340       } else {
4341         check_ksp_type = KSPGMRES;
4342         compute_eigs = PETSC_TRUE;
4343       }
4344       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4345       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4346       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4347       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4348       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4349       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4350       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4351       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4352       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4353       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4354       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4355       /* create random vec */
4356       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4357       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4358       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4359       if (CoarseNullSpace) {
4360         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4361       }
4362       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4363       /* solve coarse problem */
4364       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4365       if (CoarseNullSpace) {
4366         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4367       }
4368       /* set eigenvalue estimation if preonly has not been requested */
4369       if (compute_eigs) {
4370         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4371         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4372         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4373         lambda_max = eigs_r[neigs-1];
4374         lambda_min = eigs_r[0];
4375         if (pcbddc->use_coarse_estimates) {
4376           if (lambda_max>lambda_min) {
4377             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4378             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4379           }
4380         }
4381       }
4382 
4383       /* check coarse problem residual error */
4384       if (pcbddc->dbg_flag) {
4385         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4386         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4387         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4388         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4389         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4390         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4391         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4392         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4393         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4394         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4395         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4396         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4397         if (compute_eigs) {
4398           PetscReal lambda_max_s,lambda_min_s;
4399           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4400           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4401           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4402           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4403           for (i=0;i<neigs;i++) {
4404             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4405           }
4406         }
4407         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4408         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4409       }
4410       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4411       if (compute_eigs) {
4412         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4413         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4414       }
4415     }
4416   }
4417   /* print additional info */
4418   if (pcbddc->dbg_flag) {
4419     /* waits until all processes reaches this point */
4420     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4421     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4422     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4423   }
4424 
4425   /* free memory */
4426   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4427   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4428   PetscFunctionReturn(0);
4429 }
4430 
4431 #undef __FUNCT__
4432 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4433 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4434 {
4435   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4436   PC_IS*         pcis = (PC_IS*)pc->data;
4437   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4438   PetscInt       i,coarse_size;
4439   PetscInt       *local_primal_indices;
4440   PetscErrorCode ierr;
4441 
4442   PetscFunctionBegin;
4443   /* Compute global number of coarse dofs */
4444   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
4445     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
4446   }
4447   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
4448 
4449   /* check numbering */
4450   if (pcbddc->dbg_flag) {
4451     PetscScalar coarsesum,*array;
4452     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4453 
4454     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4455     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4456     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4457     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4458     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4459     for (i=0;i<pcbddc->local_primal_size;i++) {
4460       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4461     }
4462     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4463     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4464     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4465     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4466     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4467     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4468     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4469     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4470     for (i=0;i<pcis->n;i++) {
4471       if (array[i] == 1.0) {
4472         set_error = PETSC_TRUE;
4473         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4474       }
4475     }
4476     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4477     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4478     for (i=0;i<pcis->n;i++) {
4479       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4480     }
4481     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4482     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4483     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4484     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4485     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4486     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4487     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4488       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4489       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4490       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4491       for (i=0;i<pcbddc->local_primal_size;i++) {
4492         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
4493       }
4494       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4495     }
4496     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4497     if (set_error_reduced) {
4498       SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4499     }
4500   }
4501   /* get back data */
4502   *coarse_size_n = coarse_size;
4503   *local_primal_indices_n = local_primal_indices;
4504   PetscFunctionReturn(0);
4505 }
4506 
4507 #undef __FUNCT__
4508 #define __FUNCT__ "PCBDDCGlobalToLocal"
4509 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4510 {
4511   IS             localis_t;
4512   PetscInt       i,lsize,*idxs,n;
4513   PetscScalar    *vals;
4514   PetscErrorCode ierr;
4515 
4516   PetscFunctionBegin;
4517   /* get indices in local ordering exploiting local to global map */
4518   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4519   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4520   for (i=0;i<lsize;i++) vals[i] = 1.0;
4521   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4522   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4523   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4524   if (idxs) { /* multilevel guard */
4525     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4526   }
4527   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4528   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4529   ierr = PetscFree(vals);CHKERRQ(ierr);
4530   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4531   /* now compute set in local ordering */
4532   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4533   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4534   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4535   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4536   for (i=0,lsize=0;i<n;i++) {
4537     if (PetscRealPart(vals[i]) > 0.5) {
4538       lsize++;
4539     }
4540   }
4541   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4542   for (i=0,lsize=0;i<n;i++) {
4543     if (PetscRealPart(vals[i]) > 0.5) {
4544       idxs[lsize++] = i;
4545     }
4546   }
4547   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4548   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4549   *localis = localis_t;
4550   PetscFunctionReturn(0);
4551 }
4552 
4553 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4554 #undef __FUNCT__
4555 #define __FUNCT__ "PCBDDCMatMult_Private"
4556 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4557 {
4558   PCBDDCChange_ctx change_ctx;
4559   PetscErrorCode   ierr;
4560 
4561   PetscFunctionBegin;
4562   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4563   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4564   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4565   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4566   PetscFunctionReturn(0);
4567 }
4568 
4569 #undef __FUNCT__
4570 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4571 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4572 {
4573   PCBDDCChange_ctx change_ctx;
4574   PetscErrorCode   ierr;
4575 
4576   PetscFunctionBegin;
4577   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4578   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4579   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4580   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4581   PetscFunctionReturn(0);
4582 }
4583 
4584 #undef __FUNCT__
4585 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4586 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4587 {
4588   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4589   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4590   PetscInt            *used_xadj,*used_adjncy;
4591   PetscBool           free_used_adj;
4592   PetscErrorCode      ierr;
4593 
4594   PetscFunctionBegin;
4595   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4596   free_used_adj = PETSC_FALSE;
4597   if (pcbddc->sub_schurs_layers == -1) {
4598     used_xadj = NULL;
4599     used_adjncy = NULL;
4600   } else {
4601     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4602       used_xadj = pcbddc->mat_graph->xadj;
4603       used_adjncy = pcbddc->mat_graph->adjncy;
4604     } else if (pcbddc->computed_rowadj) {
4605       used_xadj = pcbddc->mat_graph->xadj;
4606       used_adjncy = pcbddc->mat_graph->adjncy;
4607     } else {
4608       PetscBool      flg_row=PETSC_FALSE;
4609       const PetscInt *xadj,*adjncy;
4610       PetscInt       nvtxs;
4611 
4612       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4613       if (flg_row) {
4614         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4615         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4616         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4617         free_used_adj = PETSC_TRUE;
4618       } else {
4619         pcbddc->sub_schurs_layers = -1;
4620         used_xadj = NULL;
4621         used_adjncy = NULL;
4622       }
4623       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4624     }
4625   }
4626   ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->adaptive_selection,pcbddc->use_deluxe_scaling,pcbddc->adaptive_invert_Stildas,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4627 
4628   /* free adjacency */
4629   if (free_used_adj) {
4630     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4631   }
4632   PetscFunctionReturn(0);
4633 }
4634 
4635 #undef __FUNCT__
4636 #define __FUNCT__ "PCBDDCInitSubSchurs"
4637 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4638 {
4639   PC_IS               *pcis=(PC_IS*)pc->data;
4640   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4641   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4642   PCBDDCGraph         graph;
4643   Mat                 S_j;
4644   PetscErrorCode      ierr;
4645 
4646   PetscFunctionBegin;
4647   /* attach interface graph for determining subsets */
4648   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4649     IS verticesIS;
4650 
4651     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4652     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4653     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr);
4654     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr);
4655     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4656     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4657 /*
4658     if (pcbddc->dbg_flag) {
4659       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4660     }
4661 */
4662   } else {
4663     graph = pcbddc->mat_graph;
4664   }
4665 
4666   /* Create Schur complement matrix */
4667   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4668   ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4669 
4670   /* sub_schurs init */
4671   ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_threshold);CHKERRQ(ierr);
4672   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4673   /* free graph struct */
4674   if (pcbddc->sub_schurs_rebuild) {
4675     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4676   }
4677   PetscFunctionReturn(0);
4678 }
4679