xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 4e7385fac521a32d019ba1b91cac4ccec6d80411)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscScalar     *Smult,*Seigv;
20   PetscReal       *eigs,thresh;
21   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscErrorCode  ierr;
26 
27   PetscFunctionBegin;
28   if (!sub_schurs->use_mumps) {
29     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
30   }
31   if (!sub_schurs->is_hermitian || !sub_schurs->is_posdef) {
32     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented");
33   }
34 
35   /* max size of subsets */
36   mss = 0;
37   for (i=0;i<sub_schurs->n_subs;i++) {
38     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
39       PetscInt subset_size;
40       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
41       mss = PetscMax(mss,subset_size);
42     }
43   }
44 
45   /* min/max and threshold */
46   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
47   nmin = pcbddc->adaptive_nmin > -1 ? pcbddc->adaptive_nmin : 1;
48   nmax = PetscMax(nmin,nmax);
49   if (pcbddc->adaptive_threshold > 1.0) {
50     thresh = 1.0/pcbddc->adaptive_threshold;
51   } else {
52     thresh = 1.0;
53   }
54 
55   /* allocate lapack workspace */
56   cum = cum2 = 0;
57   maxneigs = 0;
58   for (i=0;i<sub_schurs->n_subs;i++) {
59     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
60       PetscInt n,subset_size;
61       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
62       n = PetscMin(subset_size,nmax);
63       cum += subset_size*n;
64       cum2 += n;
65       maxneigs = PetscMax(maxneigs,n);
66     }
67   }
68 
69   if (mss) {
70     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
71       PetscBLASInt B_itype = 1;
72       PetscBLASInt B_N = mss;
73       PetscReal    zero = 0.0;
74       PetscReal    eps = 0.0; /* dlamch? */
75 
76       B_lwork = -1;
77       S = NULL;
78       St = NULL;
79       eigs = NULL;
80       eigv = NULL;
81       B_iwork = NULL;
82       B_ifail = NULL;
83       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
84 #if defined(PETSC_USE_COMPLEX)
85       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
86 #else
87       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
88 #endif
89       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
90       ierr = PetscFPTrapPop();CHKERRQ(ierr);
91     } else {
92         /* TODO */
93     }
94   } else {
95     lwork = 0;
96   }
97 
98   nv = 0;
99   if (sub_schurs->is_Ej_com) { /* complement of subsets, each entry is a vertex */
100     ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr);
101   }
102   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
103   ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs,
104                       B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
105 #if defined(PETSC_USE_COMPLEX)
106   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
107 #endif
108   ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr);
109   ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
110                       nv+cum2+1,&pcbddc->adaptive_constraints_ptrs,
111                       nv+cum,&pcbddc->adaptive_constraints_idxs,
112                       nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
113   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
114 
115   maxneigs = 0;
116   cum = cum2 = cumarray = 0;
117   if (sub_schurs->is_Ej_com) {
118     const PetscInt *idxs;
119 
120     ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
121     for (cum=0;cum<nv;cum++) {
122       pcbddc->adaptive_constraints_n[cum] = 1;
123       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
124       pcbddc->adaptive_constraints_ptrs[cum] = cum;
125       pcbddc->adaptive_constraints_data[cum] = 1.0;
126     }
127     cum2 = cum;
128     ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
129   }
130 
131   if (mss) { /* multilevel */
132     if (pcbddc->use_deluxe_scaling) {
133       ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
134     } else {
135       ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr);
136     }
137     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
138   }
139 
140   for (i=0;i<sub_schurs->n_subs;i++) {
141     PetscInt j,subset_size;
142 
143     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
144     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
145       const PetscInt *idxs;
146       PetscScalar    one = 1.0,scalar_zero = 0.0;
147       PetscReal      zero=0.0;
148       PetscBLASInt   B_N;
149 
150       /* S should be copied since we need it for deluxe scaling */
151       if (sub_schurs->is_hermitian) {
152         PetscInt j;
153         for (j=0;j<subset_size;j++) {
154           ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
155         }
156         for (j=0;j<subset_size;j++) {
157           ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
158         }
159       } else {
160         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
161         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
162       }
163       /* is always this the right matrix? */
164       ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
165 
166       /* we could reuse space already allocated when building sum_S_Ej_tilda_all */
167       /* St = Starray+cumarray; */
168 
169       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
170         PetscBLASInt B_itype = 1;
171         PetscBLASInt B_IL = 1, B_IU;
172         PetscReal    eps = -1.0; /* dlamch? */
173         PetscInt     nmin_s;
174 
175         /* ask for eigenvalues lower than thresh */
176         PetscPrintf(PETSC_COMM_SELF,"[%d] Computing for sub %d/%d.\n",PetscGlobalRank,i,sub_schurs->n_subs);
177         ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
178         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
179 #if defined(PETSC_USE_COMPLEX)
180         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
181 #else
182         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
183 #endif
184         ierr = PetscFPTrapPop();CHKERRQ(ierr);
185         if (B_ierr) {
186           if (B_ierr < 0 ) {
187             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
188           } else if (B_ierr <= B_N) {
189             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
190           } else {
191             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
192           }
193         }
194 
195         if (B_neigs > nmax) {
196           PetscPrintf(PETSC_COMM_SELF,"[%d]   found %d eigs, more than maximum required %d.\n",PetscGlobalRank,B_neigs,nmax);
197           B_neigs = nmax;
198         }
199 
200         nmin_s = PetscMin(nmin,B_N);
201         if (B_neigs < nmin_s) {
202           PetscBLASInt B_neigs2;
203 
204           B_IL = B_neigs + 1;
205           ierr = PetscBLASIntCast(nmin_s,&B_IU);CHKERRQ(ierr);
206           PetscPrintf(PETSC_COMM_SELF,"[%d]   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",PetscGlobalRank,B_neigs,nmin,B_IL,B_IU);
207           if (sub_schurs->is_hermitian) {
208             PetscInt j;
209             for (j=0;j<subset_size;j++) {
210               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
211             }
212             for (j=0;j<subset_size;j++) {
213               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
214             }
215           } else {
216             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
217             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
218           }
219           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
220 #if defined(PETSC_USE_COMPLEX)
221           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
222 #else
223           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
224 #endif
225           ierr = PetscFPTrapPop();CHKERRQ(ierr);
226           B_neigs += B_neigs2;
227         }
228         if (B_ierr) {
229           if (B_ierr < 0 ) {
230             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
231           } else if (B_ierr <= B_N) {
232             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
233           } else {
234             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
235           }
236         }
237         PetscPrintf(PETSC_COMM_SELF,"[%d]   -> Got %d eigs\n",PetscGlobalRank,B_neigs);
238         for (j=0;j<B_neigs;j++) {
239           if (eigs[j] == 0.0) {
240             PetscPrintf(PETSC_COMM_SELF,"[%d]     Inf\n",PetscGlobalRank);
241           } else {
242             PetscPrintf(PETSC_COMM_SELF,"[%d]     %1.6e\n",PetscGlobalRank,1.0/eigs[j]);
243           }
244         }
245       } else {
246           /* TODO */
247       }
248       maxneigs = PetscMax(B_neigs,maxneigs);
249       pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
250 
251       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
252       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&scalar_zero,Seigv,&B_N));
253       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
254 #if 0
255       PetscInt ii;
256       for (ii=0;ii<B_neigs;ii++) {
257         PetscPrintf(PETSC_COMM_SELF,"[%d]   -> Eigenvector %d/%d (%d)\n",PetscGlobalRank,ii,B_neigs,B_N);
258         for (j=0;j<B_N;j++) {
259           PetscPrintf(PETSC_COMM_SELF,"[%d]       %1.4e %1.4e\n",PetscGlobalRank,eigv[ii*B_N+j],Seigv[ii*B_N+j]);
260         }
261       }
262 #endif
263       ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
264       for (j=0;j<B_neigs;j++) {
265 #if 0
266         {
267           PetscBLASInt Blas_N,Blas_one = 1.0;
268           PetscScalar norm;
269           ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
270           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
271           if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
272             norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
273           } else {
274             norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
275           }
276           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
277         }
278 #endif
279         ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
280         pcbddc->adaptive_constraints_ptrs[cum++] = cum2;
281         cum2 += subset_size;
282       }
283       ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
284     }
285     /* shift for next computation */
286     cumarray += subset_size*subset_size;
287   }
288   pcbddc->adaptive_constraints_ptrs[cum] = cum2;
289   ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr);
290 
291   if (mss) {
292     if (pcbddc->use_deluxe_scaling) {
293       ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
294     } else {
295       ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr);
296     }
297     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
298   }
299   ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
300 #if defined(PETSC_USE_COMPLEX)
301   ierr = PetscFree(rwork);CHKERRQ(ierr);
302 #endif
303   if (pcbddc->dbg_flag) {
304     PetscInt maxneigs_r;
305     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
306     ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
307   }
308   PetscFunctionReturn(0);
309 }
310 
311 #undef __FUNCT__
312 #define __FUNCT__ "PCBDDCSetUpSolvers"
313 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
314 {
315   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
316   PetscScalar    *coarse_submat_vals;
317   PetscErrorCode ierr;
318 
319   PetscFunctionBegin;
320   /* Setup local scatters R_to_B and (optionally) R_to_D */
321   /* PCBDDCSetUpLocalWorkVectors should be called first! */
322   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
323 
324   /* Setup local neumann solver ksp_R */
325   /* PCBDDCSetUpLocalScatters should be called first! */
326   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
327 
328   /* Change global null space passed in by the user if change of basis has been requested */
329   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
330     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
331   }
332 
333   /*
334      Setup local correction and local part of coarse basis.
335      Gives back the dense local part of the coarse matrix in column major ordering
336   */
337   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
338 
339   /* Compute total number of coarse nodes and setup coarse solver */
340   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
341 
342   /* free */
343   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
344   PetscFunctionReturn(0);
345 }
346 
347 #undef __FUNCT__
348 #define __FUNCT__ "PCBDDCResetCustomization"
349 PetscErrorCode PCBDDCResetCustomization(PC pc)
350 {
351   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
352   PetscErrorCode ierr;
353 
354   PetscFunctionBegin;
355   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
356   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
357   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
358   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
359   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
360   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
361   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
362   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
363   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
364   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
365   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
366   PetscFunctionReturn(0);
367 }
368 
369 #undef __FUNCT__
370 #define __FUNCT__ "PCBDDCResetTopography"
371 PetscErrorCode PCBDDCResetTopography(PC pc)
372 {
373   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
374   PetscErrorCode ierr;
375 
376   PetscFunctionBegin;
377   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
378   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
379   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
380   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
381   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
382   PetscFunctionReturn(0);
383 }
384 
385 #undef __FUNCT__
386 #define __FUNCT__ "PCBDDCResetSolvers"
387 PetscErrorCode PCBDDCResetSolvers(PC pc)
388 {
389   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
390   PetscScalar    *array;
391   PetscErrorCode ierr;
392 
393   PetscFunctionBegin;
394   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
395   if (pcbddc->coarse_phi_B) {
396     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
397     ierr = PetscFree(array);CHKERRQ(ierr);
398   }
399   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
400   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
401   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
402   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
403   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
404   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
405   if (pcbddc->local_auxmat2) {
406     ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
407     ierr = PetscFree(array);CHKERRQ(ierr);
408   }
409   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
410   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
411   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
412   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
413   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
414   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
415   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
416   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
417   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
418   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
419   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
420   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
421   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
422   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
423   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
424   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
425   PetscFunctionReturn(0);
426 }
427 
428 #undef __FUNCT__
429 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
430 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
431 {
432   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
433   PC_IS          *pcis = (PC_IS*)pc->data;
434   VecType        impVecType;
435   PetscInt       n_constraints,n_R,old_size;
436   PetscErrorCode ierr;
437 
438   PetscFunctionBegin;
439   if (!pcbddc->ConstraintMatrix) {
440     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
441   }
442   /* get sizes */
443   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
444   n_R = pcis->n-pcbddc->n_actual_vertices;
445   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
446   /* local work vectors (try to avoid unneeded work)*/
447   /* R nodes */
448   old_size = -1;
449   if (pcbddc->vec1_R) {
450     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
451   }
452   if (n_R != old_size) {
453     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
454     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
455     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
456     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
457     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
458     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
459   }
460   /* local primal dofs */
461   old_size = -1;
462   if (pcbddc->vec1_P) {
463     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
464   }
465   if (pcbddc->local_primal_size != old_size) {
466     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
467     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
468     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
469     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
470   }
471   /* local explicit constraints */
472   old_size = -1;
473   if (pcbddc->vec1_C) {
474     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
475   }
476   if (n_constraints && n_constraints != old_size) {
477     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
478     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
479     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
480     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
481   }
482   PetscFunctionReturn(0);
483 }
484 
485 #undef __FUNCT__
486 #define __FUNCT__ "PCBDDCSetUpCorrection"
487 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
488 {
489   PetscErrorCode         ierr;
490   /* pointers to pcis and pcbddc */
491   PC_IS*                 pcis = (PC_IS*)pc->data;
492   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
493   /* submatrices of local problem */
494   Mat                    A_RV,A_VR,A_VV;
495   /* submatrices of local coarse problem */
496   Mat                    S_VV,S_CV,S_VC,S_CC;
497   /* working matrices */
498   Mat                    C_CR;
499   /* additional working stuff */
500   PC                     pc_R;
501   Mat                    F;
502   PetscBool              isLU,isCHOL,isILU;
503 
504   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
505   PetscScalar            *work;
506   PetscInt               *idx_V_B;
507   PetscInt               n,n_vertices,n_constraints;
508   PetscInt               i,n_R,n_D,n_B;
509   PetscBool              unsymmetric_check;
510   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
511   MatType                impMatType;
512   /* some shortcuts to scalars */
513   PetscScalar            one=1.0,m_one=-1.0;
514 
515   PetscFunctionBegin;
516   /* get number of vertices (corners plus constraints with change of basis)
517      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
518   n_vertices = pcbddc->n_actual_vertices;
519   n_constraints = pcbddc->local_primal_size-n_vertices;
520   /* Set Non-overlapping dimensions */
521   n_B = pcis->n_B; n_D = pcis->n - n_B;
522   n_R = pcis->n-n_vertices;
523 
524   /* Set types for local objects needed by BDDC precondtioner */
525   impMatType = MATSEQDENSE;
526 
527   /* vertices in boundary numbering */
528   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
529   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
530   if (i != n_vertices) {
531     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
532   }
533 
534   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
535   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
536   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
537   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
538   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
539   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
540   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
541   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
542   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
543   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
544 
545   unsymmetric_check = PETSC_FALSE;
546   /* allocate workspace */
547   n = 0;
548   if (n_constraints) {
549     n += n_R*n_constraints;
550   }
551   if (n_vertices) {
552     n = PetscMax(2*n_R*n_vertices,n);
553   }
554   if (!pcbddc->issym) {
555     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
556     unsymmetric_check = PETSC_TRUE;
557   }
558   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
559 
560   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
561   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
562   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
563   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
564   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
565   if (isLU || isILU || isCHOL) {
566     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
567   } else {
568     F = NULL;
569   }
570 
571   /* Precompute stuffs needed for preprocessing and application of BDDC*/
572   if (n_constraints) {
573     Mat M1,M2,M3;
574     IS  is_aux;
575     /* see if we can save some allocations */
576     if (pcbddc->local_auxmat2) {
577       PetscInt on_R,on_constraints;
578       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
579       if (on_R != n_R || on_constraints != n_constraints) {
580         PetscScalar *marray;
581 
582         ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr);
583         ierr = PetscFree(marray);CHKERRQ(ierr);
584         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
585         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
586       }
587     }
588     /* auxiliary matrices */
589     if (!pcbddc->local_auxmat2) {
590       PetscScalar *marray;
591 
592       ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr);
593       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr);
594       marray += n_R*n_constraints;
595       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr);
596     }
597 
598     /* Extract constraints on R nodes: C_{CR}  */
599     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
600     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
601     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
602 
603     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
604     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
605     for (i=0;i<n_constraints;i++) {
606       const PetscScalar *row_cmat_values;
607       const PetscInt    *row_cmat_indices;
608       PetscInt          size_of_constraint,j;
609 
610       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
611       for (j=0;j<size_of_constraint;j++) {
612         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
613       }
614       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
615     }
616     if (F) {
617       Mat B;
618 
619       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
620       ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr);
621       ierr = MatDestroy(&B);CHKERRQ(ierr);
622     } else {
623       PetscScalar *xarray;
624       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
625       for (i=0;i<n_constraints;i++) {
626         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
627         ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr);
628         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
629         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
630         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
631       }
632       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
633     }
634 
635     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
636     ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
637     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
638     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
639     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
640     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
641     ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr);
642     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
643     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
644     ierr = MatDestroy(&M2);CHKERRQ(ierr);
645     ierr = MatDestroy(&M3);CHKERRQ(ierr);
646     /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */
647     ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
648     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
649     ierr = MatDestroy(&M1);CHKERRQ(ierr);
650   }
651   /* Get submatrices from subdomain matrix */
652   if (n_vertices) {
653     Mat       newmat;
654     IS        is_aux;
655     PetscInt  ibs,mbs;
656     PetscBool issbaij;
657 
658     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
659     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
660     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
661     if (ibs != mbs) { /* need to convert to SEQAIJ */
662       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
663       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
664       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
665       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
666       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
667     } else {
668       /* this is safe */
669       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
670       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
671       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
672         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
673         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
674         ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
675         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
676         ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
677       } else {
678         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
679         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
680       }
681     }
682     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
683   }
684 
685   /* Matrix of coarse basis functions (local) */
686   if (pcbddc->coarse_phi_B) {
687     PetscInt on_B,on_primal,on_D=n_D;
688     if (pcbddc->coarse_phi_D) {
689       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
690     }
691     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
692     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
693       PetscScalar *marray;
694 
695       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
696       ierr = PetscFree(marray);CHKERRQ(ierr);
697       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
698       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
699       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
700       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
701     }
702   }
703 
704   if (!pcbddc->coarse_phi_B) {
705     PetscScalar *marray;
706 
707     n = n_B*pcbddc->local_primal_size;
708     if (pcbddc->switch_static || pcbddc->dbg_flag) {
709       n += n_D*pcbddc->local_primal_size;
710     }
711     if (!pcbddc->issym) {
712       n *= 2;
713     }
714     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
715     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
716     n = n_B*pcbddc->local_primal_size;
717     if (pcbddc->switch_static || pcbddc->dbg_flag) {
718       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
719       n += n_D*pcbddc->local_primal_size;
720     }
721     if (!pcbddc->issym) {
722       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
723       if (pcbddc->switch_static || pcbddc->dbg_flag) {
724         n = n_B*pcbddc->local_primal_size;
725         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
726       }
727     } else {
728       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
729       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
730       if (pcbddc->switch_static || pcbddc->dbg_flag) {
731         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
732         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
733       }
734     }
735   }
736   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
737   /* vertices */
738   if (n_vertices) {
739 
740     if (n_R) {
741       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
742       PetscBLASInt B_N,B_one = 1;
743       PetscScalar  *x,*y;
744 
745       ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
746       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
747       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
748       if (F) {
749         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
750       } else {
751         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
752         for (i=0;i<n_vertices;i++) {
753           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
754           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
755           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
756           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
757           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
758         }
759         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
760       }
761       ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr);
762       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
763       if (n_constraints) {
764         Mat B;
765         ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
766         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
767         ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
768         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
769         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
770         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
771         ierr = MatDestroy(&B);CHKERRQ(ierr);
772       }
773       ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
774       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
775       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
776       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
777       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
778       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
779       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
780       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
781       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
782       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
783       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
784       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
785     } else {
786       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
787       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
788     }
789     /* coarse basis functions */
790     for (i=0;i<n_vertices;i++) {
791       PetscScalar *y;
792 
793       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
794       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
795       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
796       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
797       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
798       y[n_B*i+idx_V_B[i]] = 1.0;
799       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
800       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
801 
802       if (pcbddc->switch_static || pcbddc->dbg_flag) {
803         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
804         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
805         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
806         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
807         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
808         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
809       }
810       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
811     }
812     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
813     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
814   }
815 
816   if (n_constraints) {
817     Mat B;
818 
819     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
820     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
821     ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
822     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
823     if (n_vertices) {
824       ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr);
825     }
826     ierr = MatDestroy(&B);CHKERRQ(ierr);
827     /* coarse basis functions */
828     for (i=0;i<n_constraints;i++) {
829       PetscScalar *y;
830 
831       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
832       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
833       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
834       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
835       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
836       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
837       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
838 
839       if (pcbddc->switch_static || pcbddc->dbg_flag) {
840         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
841         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
842         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
843         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
844         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
845         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
846       }
847       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
848     }
849   }
850 
851   /* compute other basis functions for non-symmetric problems */
852   if (!pcbddc->issym) {
853     Mat B,X;
854 
855     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr);
856 
857     if (n_constraints) {
858       Mat S_CCT,B_C;
859 
860       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
861       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
862       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
863       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
864       if (n_vertices) {
865         Mat B_V,S_VCT;
866 
867         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
868         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
869         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
870         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
871         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
872       }
873       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
874     }
875     if (n_vertices && n_R) {
876       Mat          A_VRT;
877       PetscBLASInt B_N,B_one = 1;
878 
879       if (!n_constraints) { /* if there are no constraints, reset work */
880         ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
881       }
882       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr);
883       ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
884       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
885       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one));
886       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
887     }
888 
889     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr);
890     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
891       for (i=0;i<pcbddc->local_primal_size;i++) {
892         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
893         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
894         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
895         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
896         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
897       }
898     } else {
899       for (i=0;i<pcbddc->local_primal_size;i++) {
900         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
901         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
902         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
903         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
904         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
905       }
906     }
907     ierr = MatDestroy(&B);CHKERRQ(ierr);
908     /* coarse basis functions */
909     for (i=0;i<pcbddc->local_primal_size;i++) {
910       PetscScalar *y;
911 
912       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
913       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
914       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
915       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
916       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
917       if (i<n_vertices) {
918         y[n_B*i+idx_V_B[i]] = 1.0;
919       }
920       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
921       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
922 
923       if (pcbddc->switch_static || pcbddc->dbg_flag) {
924         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
925         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
926         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
927         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
928         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
929         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
930       }
931       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
932     }
933     ierr = MatDestroy(&X);CHKERRQ(ierr);
934   }
935 
936   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
937   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
938   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
939   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
940   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
941   /* Checking coarse_sub_mat and coarse basis functios */
942   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
943   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
944   if (pcbddc->dbg_flag) {
945     Mat         coarse_sub_mat;
946     Mat         AUXMAT,TM1,TM2,TM3,TM4;
947     Mat         coarse_phi_D,coarse_phi_B;
948     Mat         coarse_psi_D,coarse_psi_B;
949     Mat         A_II,A_BB,A_IB,A_BI;
950     MatType     checkmattype=MATSEQAIJ;
951     PetscReal   real_value;
952 
953     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
954     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
955     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
956     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
957     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
958     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
959     if (unsymmetric_check) {
960       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
961       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
962     }
963     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
964 
965     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
966     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr);
967     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
968     if (unsymmetric_check) {
969       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
970       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
971       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
972       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
973       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
974       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
975       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
976       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
977       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
978       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
979       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
980       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
981     } else {
982       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
983       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
984       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
985       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
986       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
987       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
988       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
989       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
990     }
991     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
992     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
993     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
994     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
995     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
996     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
997 
998     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
999     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1000     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1001     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1002     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1003     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1004     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1005     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1006     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1007     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1008     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1009     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1010     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1011     if (unsymmetric_check) {
1012       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1013       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1014     }
1015     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1016   }
1017 
1018   /* free memory */
1019   ierr = PetscFree(work);CHKERRQ(ierr);
1020   if (n_vertices) {
1021     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1022   }
1023   if (n_constraints) {
1024     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1025   }
1026   /* get back data */
1027   *coarse_submat_vals_n = coarse_submat_vals;
1028   PetscFunctionReturn(0);
1029 }
1030 
1031 #undef __FUNCT__
1032 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1033 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1034 {
1035   Mat            *work_mat;
1036   IS             isrow_s,iscol_s;
1037   PetscBool      rsorted,csorted;
1038   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1039   PetscErrorCode ierr;
1040 
1041   PetscFunctionBegin;
1042   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1043   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1044   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1045   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1046 
1047   if (!rsorted) {
1048     const PetscInt *idxs;
1049     PetscInt *idxs_sorted,i;
1050 
1051     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1052     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1053     for (i=0;i<rsize;i++) {
1054       idxs_perm_r[i] = i;
1055     }
1056     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1057     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1058     for (i=0;i<rsize;i++) {
1059       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1060     }
1061     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1062     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1063   } else {
1064     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1065     isrow_s = isrow;
1066   }
1067 
1068   if (!csorted) {
1069     if (isrow == iscol) {
1070       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1071       iscol_s = isrow_s;
1072     } else {
1073       const PetscInt *idxs;
1074       PetscInt *idxs_sorted,i;
1075 
1076       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1077       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1078       for (i=0;i<csize;i++) {
1079         idxs_perm_c[i] = i;
1080       }
1081       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1082       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1083       for (i=0;i<csize;i++) {
1084         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1085       }
1086       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1087       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1088     }
1089   } else {
1090     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1091     iscol_s = iscol;
1092   }
1093 
1094   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1095 
1096   if (!rsorted || !csorted) {
1097     Mat      new_mat;
1098     IS       is_perm_r,is_perm_c;
1099 
1100     if (!rsorted) {
1101       PetscInt *idxs_r,i;
1102       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1103       for (i=0;i<rsize;i++) {
1104         idxs_r[idxs_perm_r[i]] = i;
1105       }
1106       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1107       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1108     } else {
1109       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1110     }
1111     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1112 
1113     if (!csorted) {
1114       if (isrow_s == iscol_s) {
1115         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1116         is_perm_c = is_perm_r;
1117       } else {
1118         PetscInt *idxs_c,i;
1119         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1120         for (i=0;i<csize;i++) {
1121           idxs_c[idxs_perm_c[i]] = i;
1122         }
1123         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1124         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1125       }
1126     } else {
1127       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1128     }
1129     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1130 
1131     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1132     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1133     work_mat[0] = new_mat;
1134     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1135     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1136   }
1137 
1138   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1139   *B = work_mat[0];
1140   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1141   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1142   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 #undef __FUNCT__
1147 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1148 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1149 {
1150   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1151   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1152   Mat            new_mat;
1153   IS             is_local,is_global;
1154   PetscInt       local_size;
1155   PetscBool      isseqaij;
1156   PetscErrorCode ierr;
1157 
1158   PetscFunctionBegin;
1159   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1160   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1161   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1162   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1163   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1164   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1165   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1166 
1167   /* check */
1168   if (pcbddc->dbg_flag) {
1169     Vec       x,x_change;
1170     PetscReal error;
1171 
1172     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1173     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1174     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1175     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1176     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1177     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1178     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1179     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1180     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1181     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1182     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1183     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1184     ierr = VecDestroy(&x);CHKERRQ(ierr);
1185     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1186   }
1187 
1188   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1189   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1190   if (isseqaij) {
1191     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1192   } else {
1193     Mat work_mat;
1194     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1195     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1196     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1197   }
1198   ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
1199   /*
1200   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1201   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1202   */
1203   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 #undef __FUNCT__
1208 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1209 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1210 {
1211   PC_IS*         pcis = (PC_IS*)(pc->data);
1212   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1213   IS             is_aux1,is_aux2;
1214   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1215   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
1216   PetscInt       vbs,bs;
1217   PetscBT        bitmask;
1218   PetscErrorCode ierr;
1219 
1220   PetscFunctionBegin;
1221   /*
1222     No need to setup local scatters if
1223       - primal space is unchanged
1224         AND
1225       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1226         AND
1227       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1228   */
1229   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1230     PetscFunctionReturn(0);
1231   }
1232   /* destroy old objects */
1233   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1234   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1235   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1236   /* Set Non-overlapping dimensions */
1237   n_B = pcis->n_B; n_D = pcis->n - n_B;
1238   n_vertices = pcbddc->n_actual_vertices;
1239   /* create auxiliary bitmask */
1240   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1241   for (i=0;i<n_vertices;i++) {
1242     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
1243   }
1244 
1245   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1246   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1247   for (i=0, n_R=0; i<pcis->n; i++) {
1248     if (!PetscBTLookup(bitmask,i)) {
1249       idx_R_local[n_R] = i;
1250       n_R++;
1251     }
1252   }
1253 
1254   /* Block code */
1255   vbs = 1;
1256   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1257   if (bs>1 && !(n_vertices%bs)) {
1258     PetscBool is_blocked = PETSC_TRUE;
1259     PetscInt  *vary;
1260     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1261     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1262     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1263     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
1264     for (i=0; i<n_vertices; i++) {
1265       if (vary[i]!=0 && vary[i]!=bs) {
1266         is_blocked = PETSC_FALSE;
1267         break;
1268       }
1269     }
1270     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1271       vbs = bs;
1272       for (i=0;i<n_R/vbs;i++) {
1273         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1274       }
1275     }
1276     ierr = PetscFree(vary);CHKERRQ(ierr);
1277   }
1278   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1279   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1280 
1281   /* print some info if requested */
1282   if (pcbddc->dbg_flag) {
1283     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1284     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1285     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1286     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1287     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1288     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1289     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
1290     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1291   }
1292 
1293   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1294   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1295   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1296   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1297   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1298   for (i=0; i<n_D; i++) {
1299     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1300   }
1301   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1302   for (i=0, j=0; i<n_R; i++) {
1303     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1304       aux_array1[j++] = i;
1305     }
1306   }
1307   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1308   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1309   for (i=0, j=0; i<n_B; i++) {
1310     if (!PetscBTLookup(bitmask,is_indices[i])) {
1311       aux_array2[j++] = i;
1312     }
1313   }
1314   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1315   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1316   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1317   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1318   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1319 
1320   if (pcbddc->switch_static || pcbddc->dbg_flag) {
1321     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1322     for (i=0, j=0; i<n_R; i++) {
1323       if (PetscBTLookup(bitmask,idx_R_local[i])) {
1324         aux_array1[j++] = i;
1325       }
1326     }
1327     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1328     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1329     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1330   }
1331   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1332   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1333   PetscFunctionReturn(0);
1334 }
1335 
1336 
1337 #undef __FUNCT__
1338 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1339 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1340 {
1341   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1342   PC_IS          *pcis = (PC_IS*)pc->data;
1343   PC             pc_temp;
1344   Mat            A_RR;
1345   MatReuse       reuse;
1346   PetscScalar    m_one = -1.0;
1347   PetscReal      value;
1348   PetscInt       n_D,n_R,ibs,mbs;
1349   PetscBool      use_exact,use_exact_reduced,issbaij;
1350   PetscErrorCode ierr;
1351   /* prefixes stuff */
1352   char           dir_prefix[256],neu_prefix[256],str_level[16];
1353   size_t         len;
1354 
1355   PetscFunctionBegin;
1356 
1357   /* compute prefixes */
1358   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1359   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1360   if (!pcbddc->current_level) {
1361     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1362     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1363     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1364     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1365   } else {
1366     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1367     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1368     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1369     len -= 15; /* remove "pc_bddc_coarse_" */
1370     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1371     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1372     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1373     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1374     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1375     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1376     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1377     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1378   }
1379 
1380   /* DIRICHLET PROBLEM */
1381   if (dirichlet) {
1382     /* Matrix for Dirichlet problem is pcis->A_II */
1383     ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1384     if (!pcbddc->ksp_D) { /* create object if not yet build */
1385       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1386       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1387       /* default */
1388       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1389       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1390       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1391       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1392       if (issbaij) {
1393         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1394       } else {
1395         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1396       }
1397       /* Allow user's customization */
1398       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1399       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1400     }
1401     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1402     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1403     if (!n_D) {
1404       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1405       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1406     }
1407     /* Set Up KSP for Dirichlet problem of BDDC */
1408     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1409     /* set ksp_D into pcis data */
1410     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1411     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1412     pcis->ksp_D = pcbddc->ksp_D;
1413   }
1414 
1415   /* NEUMANN PROBLEM */
1416   A_RR = 0;
1417   if (neumann) {
1418     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1419     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1420     if (pcbddc->ksp_R) { /* already created ksp */
1421       PetscInt nn_R;
1422       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1423       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1424       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1425       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1426         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1427         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1428         reuse = MAT_INITIAL_MATRIX;
1429       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1430         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1431           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1432           reuse = MAT_INITIAL_MATRIX;
1433         } else { /* safe to reuse the matrix */
1434           reuse = MAT_REUSE_MATRIX;
1435         }
1436       }
1437       /* last check */
1438       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1439         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1440         reuse = MAT_INITIAL_MATRIX;
1441       }
1442     } else { /* first time, so we need to create the matrix */
1443       reuse = MAT_INITIAL_MATRIX;
1444     }
1445     /* extract A_RR */
1446     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1447     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1448     if (ibs != mbs) {
1449       Mat newmat;
1450       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1451       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1452       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1453     } else {
1454       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1455     }
1456     if (!pcbddc->ksp_R) { /* create object if not present */
1457       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1458       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1459       /* default */
1460       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1461       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1462       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1463       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1464       if (issbaij) {
1465         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1466       } else {
1467         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1468       }
1469       /* Allow user's customization */
1470       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1471       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1472     }
1473     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1474     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1475     if (!n_R) {
1476       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1477       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1478     }
1479     /* Set Up KSP for Neumann problem of BDDC */
1480     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1481   }
1482 
1483   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1484   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1485     if (pcbddc->dbg_flag) {
1486       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1487       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1488       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1489     }
1490     if (dirichlet) { /* Dirichlet */
1491       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1492       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1493       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1494       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1495       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1496       /* need to be adapted? */
1497       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1498       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1499       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1500       /* print info */
1501       if (pcbddc->dbg_flag) {
1502         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1503         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1504       }
1505       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1506         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1507       }
1508     }
1509     if (neumann) { /* Neumann */
1510       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1511       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1512       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1513       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1514       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1515       /* need to be adapted? */
1516       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1517       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1518       /* print info */
1519       if (pcbddc->dbg_flag) {
1520         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1521         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1522       }
1523       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1524         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1525       }
1526     }
1527   }
1528   /* free Neumann problem's matrix */
1529   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1530   PetscFunctionReturn(0);
1531 }
1532 
1533 #undef __FUNCT__
1534 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1535 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1536 {
1537   PetscErrorCode ierr;
1538   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1539 
1540   PetscFunctionBegin;
1541   if (applytranspose) {
1542     if (pcbddc->local_auxmat1) {
1543       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1544       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1545     }
1546     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1547   } else {
1548     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1549     if (pcbddc->local_auxmat1) {
1550       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1551       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1552     }
1553   }
1554   PetscFunctionReturn(0);
1555 }
1556 
1557 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1558 #undef __FUNCT__
1559 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1560 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1561 {
1562   PetscErrorCode ierr;
1563   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1564   PC_IS*            pcis = (PC_IS*)  (pc->data);
1565   const PetscScalar zero = 0.0;
1566 
1567   PetscFunctionBegin;
1568   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1569   if (applytranspose) {
1570     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1571     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1572   } else {
1573     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1574     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1575   }
1576   /* start communications from local primal nodes to rhs of coarse solver */
1577   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1578   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1579   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1580 
1581   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1582   /* TODO remove null space when doing multilevel */
1583   if (pcbddc->coarse_ksp) {
1584     if (applytranspose) {
1585       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1586     } else {
1587       ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1588     }
1589   }
1590 
1591   /* Local solution on R nodes */
1592   if (pcis->n) {
1593     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1594     ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1595     ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1596     if (pcbddc->switch_static) {
1597       ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1598       ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1599     }
1600     ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1601     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1602     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1603     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1604     if (pcbddc->switch_static) {
1605       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1606       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1607     }
1608   }
1609 
1610   /* communications from coarse sol to local primal nodes */
1611   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1612   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1613 
1614   /* Sum contributions from two levels */
1615   if (applytranspose) {
1616     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1617     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1618   } else {
1619     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1620     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1621   }
1622   PetscFunctionReturn(0);
1623 }
1624 
1625 #undef __FUNCT__
1626 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1627 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1628 {
1629   PetscErrorCode ierr;
1630   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1631   PetscScalar    *array;
1632   Vec            from,to;
1633 
1634   PetscFunctionBegin;
1635   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1636     from = pcbddc->coarse_vec;
1637     to = pcbddc->vec1_P;
1638     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1639       Vec tvec;
1640 
1641       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1642       ierr = VecResetArray(tvec);CHKERRQ(ierr);
1643       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1644       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
1645       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
1646       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
1647     }
1648   } else { /* from local to global -> put data in coarse right hand side */
1649     from = pcbddc->vec1_P;
1650     to = pcbddc->coarse_vec;
1651   }
1652   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1653   PetscFunctionReturn(0);
1654 }
1655 
1656 #undef __FUNCT__
1657 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1658 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1659 {
1660   PetscErrorCode ierr;
1661   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1662   PetscScalar    *array;
1663   Vec            from,to;
1664 
1665   PetscFunctionBegin;
1666   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1667     from = pcbddc->coarse_vec;
1668     to = pcbddc->vec1_P;
1669   } else { /* from local to global -> put data in coarse right hand side */
1670     from = pcbddc->vec1_P;
1671     to = pcbddc->coarse_vec;
1672   }
1673   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1674   if (smode == SCATTER_FORWARD) {
1675     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1676       Vec tvec;
1677 
1678       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1679       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
1680       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
1681       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
1682     }
1683   } else {
1684     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
1685      ierr = VecResetArray(from);CHKERRQ(ierr);
1686     }
1687   }
1688   PetscFunctionReturn(0);
1689 }
1690 
1691 /* uncomment for testing purposes */
1692 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1693 #undef __FUNCT__
1694 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1695 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1696 {
1697   PetscErrorCode    ierr;
1698   PC_IS*            pcis = (PC_IS*)(pc->data);
1699   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1700   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1701   /* one and zero */
1702   PetscScalar       one=1.0,zero=0.0;
1703   /* space to store constraints and their local indices */
1704   PetscScalar       *temp_quadrature_constraint;
1705   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1706   /* iterators */
1707   PetscInt          i,j,k,total_counts,temp_start_ptr;
1708   /* BLAS integers */
1709   PetscBLASInt      lwork,lierr;
1710   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1711   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1712   /* reuse */
1713   PetscInt          olocal_primal_size;
1714   PetscInt          *oprimal_indices_local_idxs;
1715   /* change of basis */
1716   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1717   PetscBool         boolforchange,qr_needed;
1718   PetscBT           touched,change_basis,qr_needed_idx;
1719   /* auxiliary stuff */
1720   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1721   PetscInt          ncc,*gidxs=NULL,*permutation=NULL,*temp_indices_to_constraint_work=NULL;
1722   PetscScalar       *temp_quadrature_constraint_work=NULL;
1723   /* some quantities */
1724   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1725   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1726 
1727   PetscFunctionBegin;
1728   /* Destroy Mat objects computed previously */
1729   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1730   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1731 
1732   /* print some info */
1733   if (pcbddc->dbg_flag) {
1734     IS       vertices;
1735     PetscInt nv,nedges,nfaces;
1736     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
1737     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
1738     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
1739     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1740     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1741     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
1742     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_edges);CHKERRQ(ierr);
1743     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_faces);CHKERRQ(ierr);
1744     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1745   }
1746 
1747   if (!pcbddc->adaptive_selection) {
1748     IS           ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1749     MatNullSpace nearnullsp;
1750     const Vec    *nearnullvecs;
1751     Vec          *localnearnullsp;
1752     PetscScalar  *array;
1753     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
1754     PetscBool    nnsp_has_cnst;
1755     /* LAPACK working arrays for SVD or POD */
1756     PetscBool    skip_lapack;
1757     PetscScalar  *work;
1758     PetscReal    *singular_vals;
1759 #if defined(PETSC_USE_COMPLEX)
1760     PetscReal    *rwork;
1761 #endif
1762 #if defined(PETSC_MISSING_LAPACK_GESVD)
1763     PetscScalar  *temp_basis,*correlation_mat;
1764 #else
1765     PetscBLASInt dummy_int;
1766     PetscScalar  dummy_scalar;
1767 #endif
1768 
1769     /* Get index sets for faces, edges and vertices from graph */
1770     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1771     /* free unneeded index sets */
1772     if (!pcbddc->use_vertices) {
1773       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1774     }
1775     if (!pcbddc->use_edges) {
1776       for (i=0;i<n_ISForEdges;i++) {
1777         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1778       }
1779       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1780       n_ISForEdges = 0;
1781     }
1782     if (!pcbddc->use_faces) {
1783       for (i=0;i<n_ISForFaces;i++) {
1784         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1785       }
1786       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1787       n_ISForFaces = 0;
1788     }
1789     /* HACKS (the following two blocks of code) */
1790     if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1791       pcbddc->use_change_of_basis = PETSC_TRUE;
1792       if (!ISForEdges) {
1793         pcbddc->use_change_on_faces = PETSC_TRUE;
1794       }
1795     }
1796     if (pcbddc->NullSpace) {
1797       /* use_change_of_basis should be consistent among processors */
1798       PetscBool tbool[2],gbool[2];
1799       tbool [0] = pcbddc->use_change_of_basis;
1800       tbool [1] = pcbddc->use_change_on_faces;
1801       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1802       pcbddc->use_change_of_basis = gbool[0];
1803       pcbddc->use_change_on_faces = gbool[1];
1804     }
1805 
1806     /* check if near null space is attached to global mat */
1807     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1808     if (nearnullsp) {
1809       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1810       /* remove any stored info */
1811       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1812       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1813       /* store information for BDDC solver reuse */
1814       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1815       pcbddc->onearnullspace = nearnullsp;
1816       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1817       for (i=0;i<nnsp_size;i++) {
1818         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1819       }
1820     } else { /* if near null space is not provided BDDC uses constants by default */
1821       nnsp_size = 0;
1822       nnsp_has_cnst = PETSC_TRUE;
1823     }
1824     /* get max number of constraints on a single cc */
1825     max_constraints = nnsp_size;
1826     if (nnsp_has_cnst) max_constraints++;
1827 
1828     /*
1829          Evaluate maximum storage size needed by the procedure
1830          - temp_indices will contain start index of each constraint stored as follows
1831          - temp_indices_to_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1832          - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1833          - temp_quadrature_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself
1834                                                                                                                                                            */
1835     total_counts = n_ISForFaces+n_ISForEdges;
1836     total_counts *= max_constraints;
1837     n_vertices = 0;
1838     if (ISForVertices) {
1839       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1840     }
1841     total_counts += n_vertices;
1842     ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr);
1843     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1844     total_counts = 0;
1845     max_size_of_constraint = 0;
1846     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1847       if (i<n_ISForEdges) {
1848         used_IS = &ISForEdges[i];
1849       } else {
1850         used_IS = &ISForFaces[i-n_ISForEdges];
1851       }
1852       ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1853       total_counts += j;
1854       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1855     }
1856     total_counts *= max_constraints;
1857     total_counts += n_vertices;
1858     ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1859     /* get local part of global near null space vectors */
1860     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1861     for (k=0;k<nnsp_size;k++) {
1862       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1863       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1864       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1865     }
1866 
1867     /* whether or not to skip lapack calls */
1868     skip_lapack = PETSC_TRUE;
1869     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
1870 
1871     /* allocate some auxiliary stuff */
1872     if (!skip_lapack || pcbddc->use_qr_single) {
1873       ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr);
1874     } else {
1875       gidxs = NULL;
1876       permutation = NULL;
1877       temp_indices_to_constraint_work = NULL;
1878       temp_quadrature_constraint_work = NULL;
1879     }
1880 
1881     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1882     if (!skip_lapack) {
1883       PetscScalar temp_work;
1884 
1885 #if defined(PETSC_MISSING_LAPACK_GESVD)
1886       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1887       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1888       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1889       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1890 #if defined(PETSC_USE_COMPLEX)
1891       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1892 #endif
1893       /* now we evaluate the optimal workspace using query with lwork=-1 */
1894       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1895       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1896       lwork = -1;
1897       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1898 #if !defined(PETSC_USE_COMPLEX)
1899       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1900 #else
1901       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1902 #endif
1903       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1904       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1905 #else /* on missing GESVD */
1906       /* SVD */
1907       PetscInt max_n,min_n;
1908       max_n = max_size_of_constraint;
1909       min_n = max_constraints;
1910       if (max_size_of_constraint < max_constraints) {
1911         min_n = max_size_of_constraint;
1912         max_n = max_constraints;
1913       }
1914       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1915 #if defined(PETSC_USE_COMPLEX)
1916       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1917 #endif
1918       /* now we evaluate the optimal workspace using query with lwork=-1 */
1919       lwork = -1;
1920       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1921       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1922       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1923       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1924 #if !defined(PETSC_USE_COMPLEX)
1925       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
1926 #else
1927       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
1928 #endif
1929       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1930       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1931 #endif /* on missing GESVD */
1932       /* Allocate optimal workspace */
1933       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1934       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
1935     }
1936     /* Now we can loop on constraining sets */
1937     total_counts = 0;
1938     temp_indices[0] = 0;
1939     /* vertices */
1940     if (ISForVertices) {
1941       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1942       if (nnsp_has_cnst) { /* consider all vertices */
1943         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1944         for (i=0;i<n_vertices;i++) {
1945           temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1946           temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1947           total_counts++;
1948         }
1949       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1950         PetscBool used_vertex;
1951         for (i=0;i<n_vertices;i++) {
1952           used_vertex = PETSC_FALSE;
1953           k = 0;
1954           while (!used_vertex && k<nnsp_size) {
1955             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1956             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1957               temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1958               temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1959               temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1960               total_counts++;
1961               used_vertex = PETSC_TRUE;
1962             }
1963             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1964             k++;
1965           }
1966         }
1967       }
1968       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1969       n_vertices = total_counts;
1970     }
1971 
1972     /* edges and faces */
1973     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
1974       if (ncc<n_ISForEdges) {
1975         used_IS = &ISForEdges[ncc];
1976         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1977       } else {
1978         used_IS = &ISForFaces[ncc-n_ISForEdges];
1979         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1980       }
1981       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1982       temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1983       ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1984       ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1985       /* change of basis should not be performed on local periodic nodes */
1986       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1987       if (nnsp_has_cnst) {
1988         PetscScalar quad_value;
1989         temp_constraints++;
1990         if (!pcbddc->use_nnsp_true) {
1991           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1992         } else {
1993           quad_value = 1.0;
1994         }
1995         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1996         for (j=0;j<size_of_constraint;j++) {
1997           temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1998         }
1999         /* sort by global ordering if using lapack subroutines (not needed!) */
2000         if (!skip_lapack || pcbddc->use_qr_single) {
2001           ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
2002           for (j=0;j<size_of_constraint;j++) {
2003             permutation[j]=j;
2004           }
2005           ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2006           for (j=0;j<size_of_constraint;j++) {
2007             if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen");
2008           }
2009           for (j=0;j<size_of_constraint;j++) {
2010             temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2011             temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2012           }
2013           ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2014           ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2015         }
2016         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2017         total_counts++;
2018       }
2019       for (k=0;k<nnsp_size;k++) {
2020         PetscReal real_value;
2021         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2022         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2023         for (j=0;j<size_of_constraint;j++) {
2024           temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
2025         }
2026         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2027         /* check if array is null on the connected component */
2028         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2029         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
2030         if (real_value > 0.0) { /* keep indices and values */
2031           /* sort by global ordering if using lapack subroutines */
2032           if (!skip_lapack || pcbddc->use_qr_single) {
2033             ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
2034             for (j=0;j<size_of_constraint;j++) {
2035               permutation[j]=j;
2036             }
2037             ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2038             for (j=0;j<size_of_constraint;j++) {
2039               temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2040               temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2041             }
2042             ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2043             ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2044           }
2045           temp_constraints++;
2046           temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2047           total_counts++;
2048         }
2049       }
2050       ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2051       valid_constraints = temp_constraints;
2052       if (!pcbddc->use_nnsp_true && temp_constraints) {
2053         if (temp_constraints == 1) { /* just normalize the constraint */
2054           PetscScalar norm;
2055           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2056           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2057           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2058           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2059         } else { /* perform SVD */
2060           PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2061 
2062 #if defined(PETSC_MISSING_LAPACK_GESVD)
2063           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2064              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2065              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2066                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2067                 from that computed using LAPACKgesvd
2068              -> This is due to a different computation of eigenvectors in LAPACKheev
2069              -> The quality of the POD-computed basis will be the same */
2070           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2071           /* Store upper triangular part of correlation matrix */
2072           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2073           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2074           for (j=0;j<temp_constraints;j++) {
2075             for (k=0;k<j+1;k++) {
2076               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one));
2077             }
2078           }
2079           /* compute eigenvalues and eigenvectors of correlation matrix */
2080           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2081           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2082 #if !defined(PETSC_USE_COMPLEX)
2083           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2084 #else
2085           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2086 #endif
2087           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2088           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2089           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2090           j = 0;
2091           while (j < temp_constraints && singular_vals[j] < tol) j++;
2092           total_counts = total_counts-j;
2093           valid_constraints = temp_constraints-j;
2094           /* scale and copy POD basis into used quadrature memory */
2095           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2096           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2097           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2098           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2099           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2100           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2101           if (j<temp_constraints) {
2102             PetscInt ii;
2103             for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
2104             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2105             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2106             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2107             for (k=0;k<temp_constraints-j;k++) {
2108               for (ii=0;ii<size_of_constraint;ii++) {
2109                 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2110               }
2111             }
2112           }
2113 #else  /* on missing GESVD */
2114           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2115           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2116           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2117           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2118 #if !defined(PETSC_USE_COMPLEX)
2119           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2120 #else
2121           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2122 #endif
2123           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2124           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2125           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2126           k = temp_constraints;
2127           if (k > size_of_constraint) k = size_of_constraint;
2128           j = 0;
2129           while (j < k && singular_vals[k-j-1] < tol) j++;
2130           valid_constraints = k-j;
2131           total_counts = total_counts-temp_constraints+valid_constraints;
2132 #endif /* on missing GESVD */
2133         }
2134       }
2135       /* setting change_of_basis flag is safe now */
2136       if (boolforchange) {
2137         for (j=0;j<valid_constraints;j++) {
2138           PetscBTSet(change_basis,total_counts-j-1);
2139         }
2140       }
2141     }
2142     /* free workspace */
2143     if (!skip_lapack || pcbddc->use_qr_single) {
2144       ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr);
2145     }
2146     if (!skip_lapack) {
2147       ierr = PetscFree(work);CHKERRQ(ierr);
2148 #if defined(PETSC_USE_COMPLEX)
2149       ierr = PetscFree(rwork);CHKERRQ(ierr);
2150 #endif
2151       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2152 #if defined(PETSC_MISSING_LAPACK_GESVD)
2153       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2154       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2155 #endif
2156     }
2157     for (k=0;k<nnsp_size;k++) {
2158       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2159     }
2160     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2161     /* free index sets of faces, edges and vertices */
2162     for (i=0;i<n_ISForFaces;i++) {
2163       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2164     }
2165     if (n_ISForFaces) {
2166       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2167     }
2168     for (i=0;i<n_ISForEdges;i++) {
2169       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2170     }
2171     if (n_ISForEdges) {
2172       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2173     }
2174     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2175   } else {
2176     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2177     PetscInt        cum = 0;
2178 
2179     total_counts = 0;
2180     n_vertices = 0;
2181     if (sub_schurs->is_Ej_com) {
2182       ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr);
2183     }
2184     max_constraints = 0;
2185     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2186       total_counts += pcbddc->adaptive_constraints_n[i];
2187       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2188     }
2189     temp_indices = pcbddc->adaptive_constraints_ptrs;
2190     temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs;
2191     temp_quadrature_constraint = pcbddc->adaptive_constraints_data;
2192 
2193 #if 0
2194     printf("Found %d totals\n",total_counts);
2195     for (i=0;i<total_counts;i++) {
2196       printf("const %d, start %d",i,temp_indices[i]);
2197       printf(" end %d:\n",temp_indices[i+1]);
2198       for (j=temp_indices[i];j<temp_indices[i+1];j++) {
2199         printf("  idxs %d",temp_indices_to_constraint[j]);
2200         printf("  data %1.2e\n",temp_quadrature_constraint[j]);
2201       }
2202     }
2203     for (i=0;i<n_vertices;i++) {
2204       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i+n_vertices]);
2205     }
2206     for (i=0;i<sub_schurs->n_subs;i++) {
2207       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2208     }
2209 #endif
2210 
2211     for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]);
2212     ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr);
2213     /* Change of basis */
2214     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2215     if (pcbddc->use_change_of_basis) {
2216       cum = n_vertices;
2217       for (i=0;i<sub_schurs->n_subs;i++) {
2218         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2219           for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) {
2220             ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr);
2221           }
2222         }
2223         cum += pcbddc->adaptive_constraints_n[i+n_vertices];
2224       }
2225     }
2226   }
2227 
2228   /* map temp_indices_to_constraint in boundary numbering */
2229   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
2230   if (i != temp_indices[total_counts]) {
2231     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
2232   }
2233 
2234   /* set quantities in pcbddc data structure and store previous primal size */
2235   /* n_vertices defines the number of subdomain corners in the primal space */
2236   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
2237   olocal_primal_size = pcbddc->local_primal_size;
2238   pcbddc->local_primal_size = total_counts;
2239   pcbddc->n_vertices = n_vertices;
2240   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
2241 
2242   /* Create constraint matrix */
2243   /* The constraint matrix is used to compute the l2g map of primal dofs */
2244   /* so we need to set it up properly either with or without change of basis */
2245   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2246   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2247   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2248   /* array to compute a local numbering of constraints : vertices first then constraints */
2249   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
2250   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
2251   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
2252   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
2253   /* auxiliary stuff for basis change */
2254   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
2255   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
2256 
2257   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2258   total_primal_vertices=0;
2259   for (i=0;i<pcbddc->local_primal_size;i++) {
2260     size_of_constraint=temp_indices[i+1]-temp_indices[i];
2261     if (size_of_constraint == 1) {
2262       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
2263       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
2264       aux_primal_minloc[total_primal_vertices]=0;
2265       total_primal_vertices++;
2266     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
2267       PetscInt min_loc,min_index;
2268       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
2269       /* find first untouched local node */
2270       k = 0;
2271       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
2272       min_index = global_indices[k];
2273       min_loc = k;
2274       /* search the minimum among global nodes already untouched on the cc */
2275       for (k=1;k<size_of_constraint;k++) {
2276         /* there can be more than one constraint on a single connected component */
2277         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
2278           min_index = global_indices[k];
2279           min_loc = k;
2280         }
2281       }
2282       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
2283       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
2284       aux_primal_minloc[total_primal_vertices]=min_loc;
2285       total_primal_vertices++;
2286     }
2287   }
2288   /* determine if a QR strategy is needed for change of basis */
2289   qr_needed = PETSC_FALSE;
2290   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
2291   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2292     if (PetscBTLookup(change_basis,i)) {
2293       if (!pcbddc->use_qr_single) {
2294         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2295         j = 0;
2296         for (k=0;k<size_of_constraint;k++) {
2297           if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
2298             j++;
2299           }
2300         }
2301         /* found more than one primal dof on the cc */
2302         if (j > 1) {
2303           PetscBTSet(qr_needed_idx,i);
2304           qr_needed = PETSC_TRUE;
2305         }
2306       } else {
2307         PetscBTSet(qr_needed_idx,i);
2308         qr_needed = PETSC_TRUE;
2309       }
2310     }
2311   }
2312   /* free workspace */
2313   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2314 
2315   /* permute indices in order to have a sorted set of vertices */
2316   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
2317 
2318   /* nonzero structure of constraint matrix */
2319   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2320   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
2321   j=total_primal_vertices;
2322   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2323     if (!PetscBTLookup(change_basis,i)) {
2324       nnz[j]=temp_indices[i+1]-temp_indices[i];
2325       j++;
2326     }
2327   }
2328   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2329   ierr = PetscFree(nnz);CHKERRQ(ierr);
2330   /* set values in constraint matrix */
2331   for (i=0;i<total_primal_vertices;i++) {
2332     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2333   }
2334   total_counts = total_primal_vertices;
2335   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2336     if (!PetscBTLookup(change_basis,i)) {
2337       size_of_constraint=temp_indices[i+1]-temp_indices[i];
2338       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
2339       total_counts++;
2340     }
2341   }
2342   /* assembling */
2343   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2344   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2345   /*
2346   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2347   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2348   */
2349   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2350   if (pcbddc->use_change_of_basis) {
2351     /* dual and primal dofs on a single cc */
2352     PetscInt     dual_dofs,primal_dofs;
2353     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
2354     PetscInt     primal_counter;
2355     /* working stuff for GEQRF */
2356     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2357     PetscBLASInt lqr_work;
2358     /* working stuff for UNGQR */
2359     PetscScalar  *gqr_work,lgqr_work_t;
2360     PetscBLASInt lgqr_work;
2361     /* working stuff for TRTRS */
2362     PetscScalar  *trs_rhs;
2363     PetscBLASInt Blas_NRHS;
2364     /* pointers for values insertion into change of basis matrix */
2365     PetscInt     *start_rows,*start_cols;
2366     PetscScalar  *start_vals;
2367     /* working stuff for values insertion */
2368     PetscBT      is_primal;
2369     /* matrix sizes */
2370     PetscInt     global_size,local_size;
2371     /* temporary change of basis */
2372     Mat          localChangeOfBasisMatrix;
2373     /* extra space for debugging */
2374     PetscScalar  *dbg_work;
2375 
2376     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2377     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2378     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2379     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2380     /* nonzeros for local mat */
2381     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2382     for (i=0;i<pcis->n;i++) nnz[i]=1;
2383     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2384       if (PetscBTLookup(change_basis,i)) {
2385         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2386         if (PetscBTLookup(qr_needed_idx,i)) {
2387           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2388         } else {
2389           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2;
2390           /* get local primal index on the cc */
2391           j = 0;
2392           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
2393           nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2394         }
2395       }
2396     }
2397     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2398     ierr = PetscFree(nnz);CHKERRQ(ierr);
2399     /* Set initial identity in the matrix */
2400     for (i=0;i<pcis->n;i++) {
2401       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2402     }
2403 
2404     if (pcbddc->dbg_flag) {
2405       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2406       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2407     }
2408 
2409 
2410     /* Now we loop on the constraints which need a change of basis */
2411     /*
2412        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2413        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2414 
2415        Basic blocks of change of basis matrix T computed by
2416 
2417           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2418 
2419             | 1        0   ...        0         s_1/S |
2420             | 0        1   ...        0         s_2/S |
2421             |              ...                        |
2422             | 0        ...            1     s_{n-1}/S |
2423             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2424 
2425             with S = \sum_{i=1}^n s_i^2
2426             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2427                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2428 
2429           - QR decomposition of constraints otherwise
2430     */
2431     if (qr_needed) {
2432       /* space to store Q */
2433       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2434       /* first we issue queries for optimal work */
2435       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2436       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2437       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2438       lqr_work = -1;
2439       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2440       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2441       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2442       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2443       lgqr_work = -1;
2444       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2445       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2446       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2447       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2448       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2449       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2450       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2451       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2452       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2453       /* array to store scaling factors for reflectors */
2454       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2455       /* array to store rhs and solution of triangular solver */
2456       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2457       /* allocating workspace for check */
2458       if (pcbddc->dbg_flag) {
2459         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
2460       }
2461     }
2462     /* array to store whether a node is primal or not */
2463     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2464     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2465     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2466     if (i != total_primal_vertices) {
2467       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2468     }
2469     for (i=0;i<total_primal_vertices;i++) {
2470       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2471     }
2472     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2473 
2474     /* loop on constraints and see whether or not they need a change of basis and compute it */
2475     /* -> using implicit ordering contained in temp_indices data */
2476     total_counts = pcbddc->n_vertices;
2477     primal_counter = total_counts;
2478     while (total_counts<pcbddc->local_primal_size) {
2479       primal_dofs = 1;
2480       if (PetscBTLookup(change_basis,total_counts)) {
2481         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2482         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) {
2483           primal_dofs++;
2484         }
2485         /* get constraint info */
2486         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2487         dual_dofs = size_of_constraint-primal_dofs;
2488 
2489         if (pcbddc->dbg_flag) {
2490           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2491         }
2492 
2493         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2494 
2495           /* copy quadrature constraints for change of basis check */
2496           if (pcbddc->dbg_flag) {
2497             ierr = PetscMemcpy(dbg_work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2498           }
2499           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2500           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2501 
2502           /* compute QR decomposition of constraints */
2503           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2504           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2505           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2506           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2507           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2508           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2509           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2510 
2511           /* explictly compute R^-T */
2512           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2513           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2514           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2515           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2516           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2517           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2518           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2519           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2520           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2521           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2522 
2523           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2524           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2525           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2526           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2527           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2528           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2529           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2530           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2531           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2532 
2533           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2534              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2535              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2536           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2537           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2538           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2539           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2540           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2541           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2542           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2543           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2544           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2545           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2546 
2547           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2548           start_rows = &temp_indices_to_constraint[temp_indices[total_counts]];
2549           /* insert cols for primal dofs */
2550           for (j=0;j<primal_dofs;j++) {
2551             start_vals = &qr_basis[j*size_of_constraint];
2552             start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2553             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2554           }
2555           /* insert cols for dual dofs */
2556           for (j=0,k=0;j<dual_dofs;k++) {
2557             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2558               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2559               start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k];
2560               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2561               j++;
2562             }
2563           }
2564 
2565           /* check change of basis */
2566           if (pcbddc->dbg_flag) {
2567             PetscInt   ii,jj;
2568             PetscBool valid_qr=PETSC_TRUE;
2569             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2570             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2571             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2572             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2573             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2574             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2575             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2576             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
2577             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2578             for (jj=0;jj<size_of_constraint;jj++) {
2579               for (ii=0;ii<primal_dofs;ii++) {
2580                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2581                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2582               }
2583             }
2584             if (!valid_qr) {
2585               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2586               for (jj=0;jj<size_of_constraint;jj++) {
2587                 for (ii=0;ii<primal_dofs;ii++) {
2588                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2589                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2590                   }
2591                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2592                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2593                   }
2594                 }
2595               }
2596             } else {
2597               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2598             }
2599           }
2600         } else { /* simple transformation block */
2601           PetscInt    row,col;
2602           PetscScalar val,norm;
2603 
2604           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2605           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one));
2606           for (j=0;j<size_of_constraint;j++) {
2607             PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2608             row = temp_indices_to_constraint[temp_indices[total_counts]+j];
2609             if (!PetscBTLookup(is_primal,row_B)) {
2610               col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2611               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2612               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2613             } else {
2614               for (k=0;k<size_of_constraint;k++) {
2615                 col = temp_indices_to_constraint[temp_indices[total_counts]+k];
2616                 if (row != col) {
2617                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2618                 } else {
2619                   val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm;
2620                 }
2621                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2622               }
2623             }
2624           }
2625           if (pcbddc->dbg_flag) {
2626             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2627           }
2628         }
2629         /* increment primal counter */
2630         primal_counter += primal_dofs;
2631       } else {
2632         if (pcbddc->dbg_flag) {
2633           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2634         }
2635       }
2636       /* increment constraint counter total_counts */
2637       total_counts += primal_dofs;
2638     }
2639 
2640     /* free workspace */
2641     if (qr_needed) {
2642       if (pcbddc->dbg_flag) {
2643         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
2644       }
2645       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2646       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2647       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2648       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2649       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2650     }
2651     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2652     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2653     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2654 
2655     /* assembling of global change of variable */
2656     {
2657       Mat      tmat;
2658       PetscInt bs;
2659 
2660       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2661       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2662       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2663       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2664       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2665       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2666       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
2667       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
2668       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2669       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2670       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2671       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2672       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2673       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2674       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2675       ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2676       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2677       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2678     }
2679     /* check */
2680     if (pcbddc->dbg_flag) {
2681       PetscReal error;
2682       Vec       x,x_change;
2683 
2684       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2685       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2686       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2687       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2688       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2689       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2690       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2691       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2692       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2693       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2694       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2695       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2696       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2697       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2698       ierr = VecDestroy(&x);CHKERRQ(ierr);
2699       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2700     }
2701 
2702     /* adapt sub_schurs computed (if any) */
2703     if (pcbddc->use_deluxe_scaling) {
2704       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2705       if (sub_schurs->n_subs_par_g) {
2706         SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Change of basis with deluxe scaling and parallel problems still needs to be implemented");
2707       }
2708       if (sub_schurs->S_Ej_all) {
2709         Mat S_1,S_2,tmat;
2710         IS is_all_N;
2711 
2712         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2713         ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,&tmat);CHKERRQ(ierr);
2714         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2715         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr);
2716         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2717         sub_schurs->S_Ej_all = S_1;
2718         ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr);
2719         ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2720         sub_schurs->sum_S_Ej_all = S_2;
2721         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2722       }
2723     }
2724     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2725   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2726     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2727     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2728   }
2729 
2730   /* set up change of basis context */
2731   if (pcbddc->ChangeOfBasisMatrix) {
2732     PCBDDCChange_ctx change_ctx;
2733 
2734     if (!pcbddc->new_global_mat) {
2735       PetscInt global_size,local_size;
2736 
2737       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2738       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2739       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2740       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2741       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2742       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2743       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2744       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2745       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2746     } else {
2747       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2748       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2749       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2750     }
2751     if (!pcbddc->user_ChangeOfBasisMatrix) {
2752       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2753       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2754     } else {
2755       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2756       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2757     }
2758     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2759     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2760     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2761     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2762   }
2763 
2764   /* get indices in local ordering for vertices and constraints */
2765   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2766     ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
2767     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2768   }
2769   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2770   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2771   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2772   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2773   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2774   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2775   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2776   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2777   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2778   /* set quantities in PCBDDC data struct */
2779   pcbddc->n_actual_vertices = i;
2780   /* check if a new primal space has been introduced */
2781   pcbddc->new_primal_space_local = PETSC_TRUE;
2782   if (olocal_primal_size == pcbddc->local_primal_size) {
2783     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2784     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2785     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2786   }
2787   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2788   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2789 
2790   /* flush dbg viewer */
2791   if (pcbddc->dbg_flag) {
2792     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2793   }
2794 
2795   /* free workspace */
2796   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2797   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2798   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2799   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2800   if (!pcbddc->adaptive_selection) {
2801     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2802     ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr);
2803   } else {
2804     ierr = PetscFree4(pcbddc->adaptive_constraints_n,
2805                       pcbddc->adaptive_constraints_ptrs,
2806                       pcbddc->adaptive_constraints_idxs,
2807                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2808     ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2809   }
2810   PetscFunctionReturn(0);
2811 }
2812 
2813 #undef __FUNCT__
2814 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2815 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2816 {
2817   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2818   PC_IS       *pcis = (PC_IS*)pc->data;
2819   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2820   PetscInt    ierr,i,vertex_size;
2821   PetscViewer viewer=pcbddc->dbg_viewer;
2822 
2823   PetscFunctionBegin;
2824   /* Reset previously computed graph */
2825   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2826   /* Init local Graph struct */
2827   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2828 
2829   /* Check validity of the csr graph passed in by the user */
2830   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2831     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2832   }
2833 
2834   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2835   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2836     PetscInt *xadj,*adjncy;
2837     PetscInt nvtxs;
2838 
2839     if (pcbddc->use_local_adj) {
2840       PetscBool flg_row=PETSC_FALSE;
2841 
2842       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2843       if (flg_row) {
2844         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2845         pcbddc->computed_rowadj = PETSC_TRUE;
2846       }
2847       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2848     } else if (pcbddc->current_level) { /* just compute subdomain's connected components for coarser levels */
2849       IS                     is_dummy;
2850       ISLocalToGlobalMapping l2gmap_dummy;
2851       PetscInt               j,sum;
2852       PetscInt               *cxadj,*cadjncy;
2853       const PetscInt         *idxs;
2854       PCBDDCGraph            graph;
2855       PetscBT                is_on_boundary;
2856 
2857       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
2858       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
2859       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
2860       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
2861       ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr);
2862       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
2863       graph->xadj = xadj;
2864       graph->adjncy = adjncy;
2865       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
2866       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
2867 
2868       if (pcbddc->dbg_flag) {
2869         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr);
2870         for (i=0;i<graph->ncc;i++) {
2871           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
2872         }
2873         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2874       }
2875 
2876       ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr);
2877       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2878       for (i=0;i<pcis->n_B;i++) {
2879         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
2880       }
2881       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2882 
2883       ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr);
2884       sum = 0;
2885       for (i=0;i<graph->ncc;i++) {
2886         PetscInt sizecc = 0;
2887         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2888           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2889             sizecc++;
2890           }
2891         }
2892         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2893           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2894             cxadj[graph->queue[j]] = sizecc;
2895           }
2896         }
2897         sum += sizecc*sizecc;
2898       }
2899       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
2900       sum = 0;
2901       for (i=0;i<nvtxs;i++) {
2902         PetscInt temp = cxadj[i];
2903         cxadj[i] = sum;
2904         sum += temp;
2905       }
2906       cxadj[nvtxs] = sum;
2907       for (i=0;i<graph->ncc;i++) {
2908         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2909           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2910             PetscInt k,sizecc = 0;
2911             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
2912               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
2913                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
2914                 sizecc++;
2915               }
2916             }
2917           }
2918         }
2919       }
2920       if (nvtxs) {
2921         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
2922       } else {
2923         ierr = PetscFree(cxadj);CHKERRQ(ierr);
2924         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
2925       }
2926       graph->xadj = 0;
2927       graph->adjncy = 0;
2928       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
2929       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
2930     }
2931   }
2932 
2933   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2934   vertex_size = 1;
2935   if (pcbddc->user_provided_isfordofs) {
2936     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2937       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2938       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2939         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2940         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2941       }
2942       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2943       pcbddc->n_ISForDofs = 0;
2944       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2945     }
2946     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2947     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2948   } else {
2949     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2950       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2951       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2952       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2953         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2954       }
2955     }
2956   }
2957 
2958   /* Setup of Graph */
2959   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2960     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2961   }
2962   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2963     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2964   }
2965   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
2966 
2967   /* Graph's connected components analysis */
2968   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2969 
2970   /* print some info to stdout */
2971   if (pcbddc->dbg_flag) {
2972     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2973   }
2974 
2975   /* mark topography has done */
2976   pcbddc->recompute_topography = PETSC_FALSE;
2977   PetscFunctionReturn(0);
2978 }
2979 
2980 #undef __FUNCT__
2981 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2982 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2983 {
2984   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2985   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2986   PetscErrorCode ierr;
2987 
2988   PetscFunctionBegin;
2989   n = 0;
2990   vertices = 0;
2991   if (pcbddc->ConstraintMatrix) {
2992     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2993     for (i=0;i<local_primal_size;i++) {
2994       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2995       if (size_of_constraint == 1) n++;
2996       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2997     }
2998     if (vertices_idx) {
2999       ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr);
3000       n = 0;
3001       for (i=0;i<local_primal_size;i++) {
3002         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3003         if (size_of_constraint == 1) {
3004           vertices[n++]=row_cmat_indices[0];
3005         }
3006         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3007       }
3008     }
3009   }
3010   *n_vertices = n;
3011   if (vertices_idx) *vertices_idx = vertices;
3012   PetscFunctionReturn(0);
3013 }
3014 
3015 #undef __FUNCT__
3016 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
3017 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
3018 {
3019   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
3020   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
3021   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
3022   PetscBT        touched;
3023   PetscErrorCode ierr;
3024 
3025     /* This function assumes that the number of local constraints per connected component
3026        is not greater than the number of nodes defined for the connected component
3027        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
3028   PetscFunctionBegin;
3029   n = 0;
3030   constraints_index = 0;
3031   if (pcbddc->ConstraintMatrix) {
3032     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
3033     max_size_of_constraint = 0;
3034     for (i=0;i<local_primal_size;i++) {
3035       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3036       if (size_of_constraint > 1) {
3037         n++;
3038       }
3039       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
3040       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3041     }
3042     if (constraints_idx) {
3043       ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr);
3044       ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr);
3045       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
3046       n = 0;
3047       for (i=0;i<local_primal_size;i++) {
3048         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3049         if (size_of_constraint > 1) {
3050           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
3051           /* find first untouched local node */
3052           j = 0;
3053           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
3054           min_index = row_cmat_global_indices[j];
3055           min_loc = j;
3056           /* search the minimum among nodes not yet touched on the connected component
3057              since there can be more than one constraint on a single cc */
3058           for (j=1;j<size_of_constraint;j++) {
3059             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
3060               min_index = row_cmat_global_indices[j];
3061               min_loc = j;
3062             }
3063           }
3064           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
3065           constraints_index[n++] = row_cmat_indices[min_loc];
3066         }
3067         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3068       }
3069       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
3070       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
3071     }
3072   }
3073   *n_constraints = n;
3074   if (constraints_idx) *constraints_idx = constraints_index;
3075   PetscFunctionReturn(0);
3076 }
3077 
3078 #undef __FUNCT__
3079 #define __FUNCT__ "PCBDDCSubsetNumbering"
3080 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
3081 {
3082   Vec            local_vec,global_vec;
3083   IS             seqis,paris;
3084   VecScatter     scatter_ctx;
3085   PetscScalar    *array;
3086   PetscInt       *temp_global_dofs;
3087   PetscScalar    globalsum;
3088   PetscInt       i,j,s;
3089   PetscInt       nlocals,first_index,old_index,max_local;
3090   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
3091   PetscMPIInt    *dof_sizes,*dof_displs;
3092   PetscBool      first_found;
3093   PetscErrorCode ierr;
3094 
3095   PetscFunctionBegin;
3096   /* mpi buffers */
3097   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
3098   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
3099   j = ( !rank_prec_comm ? size_prec_comm : 0);
3100   ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr);
3101   ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr);
3102   /* get maximum size of subset */
3103   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
3104   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
3105   max_local = 0;
3106   for (i=0;i<n_local_dofs;i++) {
3107     if (max_local < temp_global_dofs[i] ) {
3108       max_local = temp_global_dofs[i];
3109     }
3110   }
3111   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
3112   max_global++;
3113   max_local = 0;
3114   for (i=0;i<n_local_dofs;i++) {
3115     if (max_local < local_dofs[i] ) {
3116       max_local = local_dofs[i];
3117     }
3118   }
3119   max_local++;
3120   /* allocate workspace */
3121   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
3122   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
3123   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
3124   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
3125   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
3126   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
3127   /* create scatter */
3128   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
3129   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
3130   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
3131   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
3132   ierr = ISDestroy(&paris);CHKERRQ(ierr);
3133   /* init array */
3134   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
3135   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3136   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3137   if (local_dofs_mult) {
3138     for (i=0;i<n_local_dofs;i++) {
3139       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
3140     }
3141   } else {
3142     for (i=0;i<n_local_dofs;i++) {
3143       array[local_dofs[i]]=1.0;
3144     }
3145   }
3146   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3147   /* scatter into global vec and get total number of global dofs */
3148   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3149   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3150   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
3151   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
3152   /* Fill global_vec with cumulative function for global numbering */
3153   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
3154   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
3155   nlocals = 0;
3156   first_index = -1;
3157   first_found = PETSC_FALSE;
3158   for (i=0;i<s;i++) {
3159     if (!first_found && PetscRealPart(array[i]) > 0.1) {
3160       first_found = PETSC_TRUE;
3161       first_index = i;
3162     }
3163     nlocals += (PetscInt)PetscRealPart(array[i]);
3164   }
3165   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3166   if (!rank_prec_comm) {
3167     dof_displs[0]=0;
3168     for (i=1;i<size_prec_comm;i++) {
3169       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3170     }
3171   }
3172   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3173   if (first_found) {
3174     array[first_index] += (PetscScalar)nlocals;
3175     old_index = first_index;
3176     for (i=first_index+1;i<s;i++) {
3177       if (PetscRealPart(array[i]) > 0.1) {
3178         array[i] += array[old_index];
3179         old_index = i;
3180       }
3181     }
3182   }
3183   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
3184   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3185   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3186   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3187   /* get global ordering of local dofs */
3188   ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3189   if (local_dofs_mult) {
3190     for (i=0;i<n_local_dofs;i++) {
3191       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
3192     }
3193   } else {
3194     for (i=0;i<n_local_dofs;i++) {
3195       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
3196     }
3197   }
3198   ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3199   /* free workspace */
3200   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
3201   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
3202   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
3203   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
3204   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
3205   /* return pointer to global ordering of local dofs */
3206   *global_numbering_subset = temp_global_dofs;
3207   PetscFunctionReturn(0);
3208 }
3209 
3210 #undef __FUNCT__
3211 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3212 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3213 {
3214   PetscInt       i,j;
3215   PetscScalar    *alphas;
3216   PetscErrorCode ierr;
3217 
3218   PetscFunctionBegin;
3219   /* this implements stabilized Gram-Schmidt */
3220   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3221   for (i=0;i<n;i++) {
3222     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3223     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3224     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3225   }
3226   ierr = PetscFree(alphas);CHKERRQ(ierr);
3227   PetscFunctionReturn(0);
3228 }
3229 
3230 #undef __FUNCT__
3231 #define __FUNCT__ "MatISGetSubassemblingPattern"
3232 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
3233 {
3234   Mat             subdomain_adj;
3235   IS              new_ranks,ranks_send_to;
3236   MatPartitioning partitioner;
3237   Mat_IS          *matis;
3238   PetscInt        n_neighs,*neighs,*n_shared,**shared;
3239   PetscInt        prank;
3240   PetscMPIInt     size,rank,color;
3241   PetscInt        *xadj,*adjncy,*oldranks;
3242   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
3243   PetscInt        i,local_size,threshold=0;
3244   PetscErrorCode  ierr;
3245   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3246   PetscSubcomm    subcomm;
3247 
3248   PetscFunctionBegin;
3249   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3250   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3251   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3252 
3253   /* Get info on mapping */
3254   matis = (Mat_IS*)(mat->data);
3255   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3256   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3257 
3258   /* build local CSR graph of subdomains' connectivity */
3259   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3260   xadj[0] = 0;
3261   xadj[1] = PetscMax(n_neighs-1,0);
3262   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3263   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3264 
3265   if (threshold) {
3266     PetscInt xadj_count = 0;
3267     for (i=1;i<n_neighs;i++) {
3268       if (n_shared[i] > threshold) {
3269         adjncy[xadj_count] = neighs[i];
3270         adjncy_wgt[xadj_count] = n_shared[i];
3271         xadj_count++;
3272       }
3273     }
3274     xadj[1] = xadj_count;
3275   } else {
3276     if (xadj[1]) {
3277       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3278       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3279     }
3280   }
3281   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3282   if (use_square) {
3283     for (i=0;i<xadj[1];i++) {
3284       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3285     }
3286   }
3287   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3288 
3289   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3290 
3291   /*
3292     Restrict work on active processes only.
3293   */
3294   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3295   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3296   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3297   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3298   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3299   if (color) {
3300     ierr = PetscFree(xadj);CHKERRQ(ierr);
3301     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3302     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3303   } else {
3304     PetscInt coarsening_ratio;
3305     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
3306     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3307     prank = rank;
3308     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
3309     /*
3310     for (i=0;i<size;i++) {
3311       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3312     }
3313     */
3314     for (i=0;i<xadj[1];i++) {
3315       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3316     }
3317     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3318     ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3319     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3320 
3321     /* Partition */
3322     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
3323     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3324     if (use_vwgt) {
3325       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3326       v_wgt[0] = local_size;
3327       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3328     }
3329     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3330     coarsening_ratio = size/n_subdomains;
3331     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3332     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3333     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3334     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3335 
3336     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3337     if (contiguous) {
3338       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3339     } else {
3340       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3341     }
3342     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3343     /* clean up */
3344     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3345     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3346     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3347     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3348   }
3349   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3350 
3351   /* assemble parallel IS for sends */
3352   i = 1;
3353   if (color) i=0;
3354   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3355 
3356   /* get back IS */
3357   *is_sends = ranks_send_to;
3358   PetscFunctionReturn(0);
3359 }
3360 
3361 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3362 
3363 #undef __FUNCT__
3364 #define __FUNCT__ "MatISSubassemble"
3365 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3366 {
3367   Mat                    local_mat;
3368   Mat_IS                 *matis;
3369   IS                     is_sends_internal;
3370   PetscInt               rows,cols,new_local_rows;
3371   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3372   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3373   ISLocalToGlobalMapping l2gmap;
3374   PetscInt*              l2gmap_indices;
3375   const PetscInt*        is_indices;
3376   MatType                new_local_type;
3377   /* buffers */
3378   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3379   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3380   PetscInt               *recv_buffer_idxs_local;
3381   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3382   /* MPI */
3383   MPI_Comm               comm,comm_n;
3384   PetscSubcomm           subcomm;
3385   PetscMPIInt            n_sends,n_recvs,commsize;
3386   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3387   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3388   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3389   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3390   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3391   PetscErrorCode         ierr;
3392 
3393   PetscFunctionBegin;
3394   /* TODO: add missing checks */
3395   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3396   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3397   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3398   PetscValidLogicalCollectiveInt(mat,nis,7);
3399   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3400   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3401   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3402   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3403   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3404   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3405   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3406   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3407     PetscInt mrows,mcols,mnrows,mncols;
3408     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3409     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3410     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3411     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3412     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3413     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3414   }
3415   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3416   PetscValidLogicalCollectiveInt(mat,bs,0);
3417   /* prepare IS for sending if not provided */
3418   if (!is_sends) {
3419     PetscBool pcontig = PETSC_TRUE;
3420     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3421     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3422   } else {
3423     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3424     is_sends_internal = is_sends;
3425   }
3426 
3427   /* get pointer of MATIS data */
3428   matis = (Mat_IS*)mat->data;
3429 
3430   /* get comm */
3431   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3432 
3433   /* compute number of sends */
3434   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3435   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3436 
3437   /* compute number of receives */
3438   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3439   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3440   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3441   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3442   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3443   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3444   ierr = PetscFree(iflags);CHKERRQ(ierr);
3445 
3446   /* restrict comm if requested */
3447   subcomm = 0;
3448   destroy_mat = PETSC_FALSE;
3449   if (restrict_comm) {
3450     PetscMPIInt color,subcommsize;
3451 
3452     color = 0;
3453     if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */
3454     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3455     subcommsize = commsize - subcommsize;
3456     /* check if reuse has been requested */
3457     if (reuse == MAT_REUSE_MATRIX) {
3458       if (*mat_n) {
3459         PetscMPIInt subcommsize2;
3460         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3461         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3462         comm_n = PetscObjectComm((PetscObject)*mat_n);
3463       } else {
3464         comm_n = PETSC_COMM_SELF;
3465       }
3466     } else { /* MAT_INITIAL_MATRIX */
3467       PetscMPIInt rank;
3468 
3469       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3470       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3471       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3472       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3473       comm_n = PetscSubcommChild(subcomm);
3474     }
3475     /* flag to destroy *mat_n if not significative */
3476     if (color) destroy_mat = PETSC_TRUE;
3477   } else {
3478     comm_n = comm;
3479   }
3480 
3481   /* prepare send/receive buffers */
3482   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3483   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3484   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3485   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3486   if (nis) {
3487     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3488   }
3489 
3490   /* Get data from local matrices */
3491   if (!isdense) {
3492     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3493     /* TODO: See below some guidelines on how to prepare the local buffers */
3494     /*
3495        send_buffer_vals should contain the raw values of the local matrix
3496        send_buffer_idxs should contain:
3497        - MatType_PRIVATE type
3498        - PetscInt        size_of_l2gmap
3499        - PetscInt        global_row_indices[size_of_l2gmap]
3500        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3501     */
3502   } else {
3503     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3504     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3505     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3506     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3507     send_buffer_idxs[1] = i;
3508     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3509     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3510     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3511     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3512     for (i=0;i<n_sends;i++) {
3513       ilengths_vals[is_indices[i]] = len*len;
3514       ilengths_idxs[is_indices[i]] = len+2;
3515     }
3516   }
3517   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3518   /* additional is (if any) */
3519   if (nis) {
3520     PetscMPIInt psum;
3521     PetscInt j;
3522     for (j=0,psum=0;j<nis;j++) {
3523       PetscInt plen;
3524       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3525       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3526       psum += len+1; /* indices + lenght */
3527     }
3528     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3529     for (j=0,psum=0;j<nis;j++) {
3530       PetscInt plen;
3531       const PetscInt *is_array_idxs;
3532       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3533       send_buffer_idxs_is[psum] = plen;
3534       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3535       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3536       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3537       psum += plen+1; /* indices + lenght */
3538     }
3539     for (i=0;i<n_sends;i++) {
3540       ilengths_idxs_is[is_indices[i]] = psum;
3541     }
3542     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3543   }
3544 
3545   buf_size_idxs = 0;
3546   buf_size_vals = 0;
3547   buf_size_idxs_is = 0;
3548   for (i=0;i<n_recvs;i++) {
3549     buf_size_idxs += (PetscInt)olengths_idxs[i];
3550     buf_size_vals += (PetscInt)olengths_vals[i];
3551     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3552   }
3553   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3554   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3555   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3556 
3557   /* get new tags for clean communications */
3558   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3559   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3560   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3561 
3562   /* allocate for requests */
3563   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3564   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3565   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3566   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3567   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3568   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3569 
3570   /* communications */
3571   ptr_idxs = recv_buffer_idxs;
3572   ptr_vals = recv_buffer_vals;
3573   ptr_idxs_is = recv_buffer_idxs_is;
3574   for (i=0;i<n_recvs;i++) {
3575     source_dest = onodes[i];
3576     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3577     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3578     ptr_idxs += olengths_idxs[i];
3579     ptr_vals += olengths_vals[i];
3580     if (nis) {
3581       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3582       ptr_idxs_is += olengths_idxs_is[i];
3583     }
3584   }
3585   for (i=0;i<n_sends;i++) {
3586     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3587     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3588     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3589     if (nis) {
3590       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3591     }
3592   }
3593   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3594   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3595 
3596   /* assemble new l2g map */
3597   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3598   ptr_idxs = recv_buffer_idxs;
3599   new_local_rows = 0;
3600   for (i=0;i<n_recvs;i++) {
3601     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3602     ptr_idxs += olengths_idxs[i];
3603   }
3604   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3605   ptr_idxs = recv_buffer_idxs;
3606   new_local_rows = 0;
3607   for (i=0;i<n_recvs;i++) {
3608     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3609     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3610     ptr_idxs += olengths_idxs[i];
3611   }
3612   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3613   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3614   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3615 
3616   /* infer new local matrix type from received local matrices type */
3617   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3618   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3619   if (n_recvs) {
3620     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3621     ptr_idxs = recv_buffer_idxs;
3622     for (i=0;i<n_recvs;i++) {
3623       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3624         new_local_type_private = MATAIJ_PRIVATE;
3625         break;
3626       }
3627       ptr_idxs += olengths_idxs[i];
3628     }
3629     switch (new_local_type_private) {
3630       case MATDENSE_PRIVATE:
3631         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3632           new_local_type = MATSEQAIJ;
3633           bs = 1;
3634         } else { /* if I receive only 1 dense matrix */
3635           new_local_type = MATSEQDENSE;
3636           bs = 1;
3637         }
3638         break;
3639       case MATAIJ_PRIVATE:
3640         new_local_type = MATSEQAIJ;
3641         bs = 1;
3642         break;
3643       case MATBAIJ_PRIVATE:
3644         new_local_type = MATSEQBAIJ;
3645         break;
3646       case MATSBAIJ_PRIVATE:
3647         new_local_type = MATSEQSBAIJ;
3648         break;
3649       default:
3650         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3651         break;
3652     }
3653   } else { /* by default, new_local_type is seqdense */
3654     new_local_type = MATSEQDENSE;
3655     bs = 1;
3656   }
3657 
3658   /* create MATIS object if needed */
3659   if (reuse == MAT_INITIAL_MATRIX) {
3660     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3661     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3662   } else {
3663     /* it also destroys the local matrices */
3664     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3665   }
3666   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3667   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3668 
3669   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3670 
3671   /* Global to local map of received indices */
3672   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3673   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3674   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3675 
3676   /* restore attributes -> type of incoming data and its size */
3677   buf_size_idxs = 0;
3678   for (i=0;i<n_recvs;i++) {
3679     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3680     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3681     buf_size_idxs += (PetscInt)olengths_idxs[i];
3682   }
3683   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3684 
3685   /* set preallocation */
3686   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3687   if (!newisdense) {
3688     PetscInt *new_local_nnz=0;
3689 
3690     ptr_vals = recv_buffer_vals;
3691     ptr_idxs = recv_buffer_idxs_local;
3692     if (n_recvs) {
3693       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3694     }
3695     for (i=0;i<n_recvs;i++) {
3696       PetscInt j;
3697       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3698         for (j=0;j<*(ptr_idxs+1);j++) {
3699           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3700         }
3701       } else {
3702         /* TODO */
3703       }
3704       ptr_idxs += olengths_idxs[i];
3705     }
3706     if (new_local_nnz) {
3707       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3708       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3709       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3710       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3711       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3712       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3713     } else {
3714       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3715     }
3716     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3717   } else {
3718     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3719   }
3720 
3721   /* set values */
3722   ptr_vals = recv_buffer_vals;
3723   ptr_idxs = recv_buffer_idxs_local;
3724   for (i=0;i<n_recvs;i++) {
3725     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3726       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3727       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3728       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3729       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3730       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3731     } else {
3732       /* TODO */
3733     }
3734     ptr_idxs += olengths_idxs[i];
3735     ptr_vals += olengths_vals[i];
3736   }
3737   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3738   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3739   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3740   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3741   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3742   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3743 
3744 #if 0
3745   if (!restrict_comm) { /* check */
3746     Vec       lvec,rvec;
3747     PetscReal infty_error;
3748 
3749     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3750     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3751     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3752     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3753     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3754     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3755     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3756     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3757     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3758   }
3759 #endif
3760 
3761   /* assemble new additional is (if any) */
3762   if (nis) {
3763     PetscInt **temp_idxs,*count_is,j,psum;
3764 
3765     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3766     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3767     ptr_idxs = recv_buffer_idxs_is;
3768     psum = 0;
3769     for (i=0;i<n_recvs;i++) {
3770       for (j=0;j<nis;j++) {
3771         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3772         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3773         psum += plen;
3774         ptr_idxs += plen+1; /* shift pointer to received data */
3775       }
3776     }
3777     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3778     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3779     for (i=1;i<nis;i++) {
3780       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3781     }
3782     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3783     ptr_idxs = recv_buffer_idxs_is;
3784     for (i=0;i<n_recvs;i++) {
3785       for (j=0;j<nis;j++) {
3786         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3787         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3788         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3789         ptr_idxs += plen+1; /* shift pointer to received data */
3790       }
3791     }
3792     for (i=0;i<nis;i++) {
3793       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3794       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3795       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3796     }
3797     ierr = PetscFree(count_is);CHKERRQ(ierr);
3798     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3799     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3800   }
3801   /* free workspace */
3802   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3803   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3804   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3805   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3806   if (isdense) {
3807     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3808     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3809   } else {
3810     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3811   }
3812   if (nis) {
3813     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3814     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3815   }
3816   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3817   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3818   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3819   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3820   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3821   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3822   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3823   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3824   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3825   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3826   ierr = PetscFree(onodes);CHKERRQ(ierr);
3827   if (nis) {
3828     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3829     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3830     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3831   }
3832   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3833   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3834     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3835     for (i=0;i<nis;i++) {
3836       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3837     }
3838   }
3839   PetscFunctionReturn(0);
3840 }
3841 
3842 /* temporary hack into ksp private data structure */
3843 #include <petsc-private/kspimpl.h>
3844 
3845 #undef __FUNCT__
3846 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3847 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3848 {
3849   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3850   PC_IS                  *pcis = (PC_IS*)pc->data;
3851   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3852   MatNullSpace           CoarseNullSpace=NULL;
3853   ISLocalToGlobalMapping coarse_islg;
3854   IS                     coarse_is,*isarray;
3855   PetscInt               i,im_active=-1,active_procs=-1;
3856   PetscInt               nis,nisdofs,nisneu;
3857   PC                     pc_temp;
3858   PCType                 coarse_pc_type;
3859   KSPType                coarse_ksp_type;
3860   PetscBool              multilevel_requested,multilevel_allowed;
3861   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3862   Mat                    t_coarse_mat_is;
3863   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3864   PetscMPIInt            all_procs;
3865   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3866   PetscBool              compute_vecs = PETSC_FALSE;
3867   PetscScalar            *array;
3868   PetscErrorCode         ierr;
3869 
3870   PetscFunctionBegin;
3871   /* Assign global numbering to coarse dofs */
3872   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3873     PetscInt ocoarse_size;
3874     compute_vecs = PETSC_TRUE;
3875     ocoarse_size = pcbddc->coarse_size;
3876     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3877     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3878     /* see if we can avoid some work */
3879     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3880       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3881         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3882         coarse_reuse = PETSC_FALSE;
3883       } else { /* we can safely reuse already computed coarse matrix */
3884         coarse_reuse = PETSC_TRUE;
3885       }
3886     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3887       coarse_reuse = PETSC_FALSE;
3888     }
3889     /* reset any subassembling information */
3890     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3891     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3892   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3893     coarse_reuse = PETSC_TRUE;
3894   }
3895 
3896   /* count "active" (i.e. with positive local size) and "void" processes */
3897   im_active = !!(pcis->n);
3898   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3899   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3900   void_procs = all_procs-active_procs;
3901   csin_type_simple = PETSC_TRUE;
3902   redist = PETSC_FALSE;
3903   if (pcbddc->current_level && void_procs) {
3904     csin_ml = PETSC_TRUE;
3905     ncoarse_ml = void_procs;
3906     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
3907     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
3908       csin_ds = PETSC_TRUE;
3909       ncoarse_ds = pcbddc->redistribute_coarse;
3910       redist = PETSC_TRUE;
3911     } else {
3912       csin_ds = PETSC_TRUE;
3913       ncoarse_ds = active_procs;
3914       redist = PETSC_TRUE;
3915     }
3916   } else {
3917     csin_ml = PETSC_FALSE;
3918     ncoarse_ml = all_procs;
3919     if (void_procs) {
3920       csin_ds = PETSC_TRUE;
3921       ncoarse_ds = void_procs;
3922       csin_type_simple = PETSC_FALSE;
3923     } else {
3924       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
3925         csin_ds = PETSC_TRUE;
3926         ncoarse_ds = pcbddc->redistribute_coarse;
3927         redist = PETSC_TRUE;
3928       } else {
3929         csin_ds = PETSC_FALSE;
3930         ncoarse_ds = all_procs;
3931       }
3932     }
3933   }
3934 
3935   /*
3936     test if we can go multilevel: three conditions must be satisfied:
3937     - we have not exceeded the number of levels requested
3938     - we can actually subassemble the active processes
3939     - we can find a suitable number of MPI processes where we can place the subassembled problem
3940   */
3941   multilevel_allowed = PETSC_FALSE;
3942   multilevel_requested = PETSC_FALSE;
3943   if (pcbddc->current_level < pcbddc->max_levels) {
3944     multilevel_requested = PETSC_TRUE;
3945     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3946       multilevel_allowed = PETSC_FALSE;
3947     } else {
3948       multilevel_allowed = PETSC_TRUE;
3949     }
3950   }
3951   /* determine number of process partecipating to coarse solver */
3952   if (multilevel_allowed) {
3953     ncoarse = ncoarse_ml;
3954     csin = csin_ml;
3955     redist = PETSC_FALSE;
3956   } else {
3957     ncoarse = ncoarse_ds;
3958     csin = csin_ds;
3959   }
3960 
3961   /* creates temporary l2gmap and IS for coarse indexes */
3962   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3963   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3964 
3965   /* creates temporary MATIS object for coarse matrix */
3966   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
3967   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3968   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
3969   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3970 #if 0
3971   {
3972     PetscViewer viewer;
3973     char filename[256];
3974     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
3975     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
3976     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3977     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
3978     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3979   }
3980 #endif
3981   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
3982   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3983   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3984   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3985   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3986 
3987   /* compute dofs splitting and neumann boundaries for coarse dofs */
3988   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
3989     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3990     const PetscInt         *idxs;
3991     ISLocalToGlobalMapping tmap;
3992 
3993     /* create map between primal indices (in local representative ordering) and local primal numbering */
3994     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3995     /* allocate space for temporary storage */
3996     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
3997     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
3998     /* allocate for IS array */
3999     nisdofs = pcbddc->n_ISForDofsLocal;
4000     nisneu = !!pcbddc->NeumannBoundariesLocal;
4001     nis = nisdofs + nisneu;
4002     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
4003     /* dofs splitting */
4004     for (i=0;i<nisdofs;i++) {
4005       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4006       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4007       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4008       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4009       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4010       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4011       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4012       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4013     }
4014     /* neumann boundaries */
4015     if (pcbddc->NeumannBoundariesLocal) {
4016       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4017       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4018       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4019       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4020       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4021       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4022       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4023       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4024     }
4025     /* free memory */
4026     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4027     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4028     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4029   } else {
4030     nis = 0;
4031     nisdofs = 0;
4032     nisneu = 0;
4033     isarray = NULL;
4034   }
4035   /* destroy no longer needed map */
4036   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4037 
4038   /* restrict on coarse candidates (if needed) */
4039   coarse_mat_is = NULL;
4040   if (csin) {
4041     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4042       if (redist) {
4043         PetscMPIInt rank;
4044         PetscInt    spc,n_spc_p1,dest[1],destsize;
4045 
4046         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4047         spc = active_procs/ncoarse;
4048         n_spc_p1 = active_procs%ncoarse;
4049         if (im_active) {
4050           destsize = 1;
4051           if (rank > n_spc_p1*(spc+1)-1) {
4052             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4053           } else {
4054             dest[0] = rank/(spc+1);
4055           }
4056         } else {
4057           destsize = 0;
4058         }
4059         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4060       } else if (csin_type_simple) {
4061         PetscMPIInt rank;
4062         PetscInt    issize,isidx;
4063 
4064         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4065         if (im_active) {
4066           issize = 1;
4067           isidx = (PetscInt)rank;
4068         } else {
4069           issize = 0;
4070           isidx = -1;
4071         }
4072         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4073       } else { /* get a suitable subassembling pattern from MATIS code */
4074         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4075       }
4076 
4077       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4078       if (!redist || ncoarse <= void_procs) {
4079         PetscInt ncoarse_cand,tissize,*nisindices;
4080         PetscInt *coarse_candidates;
4081         const PetscInt* tisindices;
4082 
4083         /* get coarse candidates' ranks in pc communicator */
4084         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4085         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4086         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4087           if (!coarse_candidates[i]) {
4088             coarse_candidates[ncoarse_cand++]=i;
4089           }
4090         }
4091         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4092 
4093 
4094         if (pcbddc->dbg_flag) {
4095           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4096           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4097           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4098           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4099           for (i=0;i<ncoarse_cand;i++) {
4100             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4101           }
4102           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4103           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4104         }
4105         /* shift the pattern on coarse candidates */
4106         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4107         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4108         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4109         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4110         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4111         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4112         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4113       }
4114       if (pcbddc->dbg_flag) {
4115         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4116         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4117         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4118         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4119       }
4120     }
4121     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4122     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4123   } else {
4124     if (pcbddc->dbg_flag) {
4125       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4126       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4127       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4128     }
4129     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4130     coarse_mat_is = t_coarse_mat_is;
4131   }
4132 
4133   /* create local to global scatters for coarse problem */
4134   if (compute_vecs) {
4135     PetscInt lrows;
4136     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4137     if (coarse_mat_is) {
4138       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4139     } else {
4140       lrows = 0;
4141     }
4142     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4143     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4144     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4145     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4146     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4147   }
4148   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4149   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4150 
4151   /* set defaults for coarse KSP and PC */
4152   if (multilevel_allowed) {
4153     coarse_ksp_type = KSPRICHARDSON;
4154     coarse_pc_type = PCBDDC;
4155   } else {
4156     coarse_ksp_type = KSPPREONLY;
4157     coarse_pc_type = PCREDUNDANT;
4158   }
4159 
4160   /* print some info if requested */
4161   if (pcbddc->dbg_flag) {
4162     if (!multilevel_allowed) {
4163       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4164       if (multilevel_requested) {
4165         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4166       } else if (pcbddc->max_levels) {
4167         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4168       }
4169       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4170     }
4171   }
4172 
4173   /* create the coarse KSP object only once with defaults */
4174   if (coarse_mat_is) {
4175     MatReuse coarse_mat_reuse;
4176     PetscViewer dbg_viewer = NULL;
4177     if (pcbddc->dbg_flag) {
4178       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4179       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4180     }
4181     if (!pcbddc->coarse_ksp) {
4182       char prefix[256],str_level[16];
4183       size_t len;
4184       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4185       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4186       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4187       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4188       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4189       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4190       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4191       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4192       /* prefix */
4193       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4194       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4195       if (!pcbddc->current_level) {
4196         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4197         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4198       } else {
4199         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4200         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4201         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4202         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4203         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4204         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4205       }
4206       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4207       /* allow user customization */
4208       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4209       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4210     }
4211 
4212     /* get some info after set from options */
4213     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4214     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4215     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4216     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4217     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4218       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4219       isbddc = PETSC_FALSE;
4220     }
4221     if (isredundant) {
4222       KSP inner_ksp;
4223       PC inner_pc;
4224       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4225       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4226       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4227     }
4228 
4229     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4230     ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4231     ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4232     ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4233     if (nisdofs) {
4234       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4235       for (i=0;i<nisdofs;i++) {
4236         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4237       }
4238     }
4239     if (nisneu) {
4240       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4241       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4242     }
4243 
4244     /* assemble coarse matrix */
4245     if (coarse_reuse) {
4246       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4247       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4248       coarse_mat_reuse = MAT_REUSE_MATRIX;
4249     } else {
4250       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4251     }
4252     if (isbddc || isnn) {
4253       if (pcbddc->coarsening_ratio > 1) {
4254         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4255           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4256           if (pcbddc->dbg_flag) {
4257             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4258             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4259             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4260             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4261           }
4262         }
4263         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4264       } else {
4265         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4266         coarse_mat = coarse_mat_is;
4267       }
4268     } else {
4269       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4270     }
4271     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4272 
4273     /* propagate symmetry info to coarse matrix */
4274     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
4275     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4276 
4277     /* set operators */
4278     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4279     if (pcbddc->dbg_flag) {
4280       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4281     }
4282   } else { /* processes non partecipating to coarse solver (if any) */
4283     coarse_mat = 0;
4284   }
4285   ierr = PetscFree(isarray);CHKERRQ(ierr);
4286 #if 0
4287   {
4288     PetscViewer viewer;
4289     char filename[256];
4290     sprintf(filename,"coarse_mat.m");
4291     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4292     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4293     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4294     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4295   }
4296 #endif
4297 
4298   /* Compute coarse null space (special handling by BDDC only) */
4299   if (pcbddc->NullSpace) {
4300     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4301   }
4302 
4303   if (pcbddc->coarse_ksp) {
4304     Vec crhs,csol;
4305     PetscBool ispreonly;
4306     if (CoarseNullSpace) {
4307       if (isbddc) {
4308         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4309       } else {
4310         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4311       }
4312     }
4313     /* setup coarse ksp */
4314     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4315     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4316     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4317     /* hack */
4318     if (!csol) {
4319       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4320     }
4321     if (!crhs) {
4322       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4323     }
4324     /* Check coarse problem if in debug mode or if solving with an iterative method */
4325     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4326     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4327       KSP       check_ksp;
4328       KSPType   check_ksp_type;
4329       PC        check_pc;
4330       Vec       check_vec,coarse_vec;
4331       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4332       PetscInt  its;
4333       PetscBool compute_eigs;
4334       PetscReal *eigs_r,*eigs_c;
4335       PetscInt  neigs;
4336       const char *prefix;
4337 
4338       /* Create ksp object suitable for estimation of extreme eigenvalues */
4339       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4340       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4341       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4342       if (ispreonly) {
4343         check_ksp_type = KSPPREONLY;
4344         compute_eigs = PETSC_FALSE;
4345       } else {
4346         check_ksp_type = KSPGMRES;
4347         compute_eigs = PETSC_TRUE;
4348       }
4349       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4350       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4351       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4352       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4353       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4354       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4355       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4356       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4357       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4358       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4359       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4360       /* create random vec */
4361       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4362       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4363       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4364       if (CoarseNullSpace) {
4365         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4366       }
4367       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4368       /* solve coarse problem */
4369       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4370       if (CoarseNullSpace) {
4371         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4372       }
4373       /* set eigenvalue estimation if preonly has not been requested */
4374       if (compute_eigs) {
4375         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4376         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4377         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4378         lambda_max = eigs_r[neigs-1];
4379         lambda_min = eigs_r[0];
4380         if (pcbddc->use_coarse_estimates) {
4381           if (lambda_max>lambda_min) {
4382             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4383             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4384           }
4385         }
4386       }
4387 
4388       /* check coarse problem residual error */
4389       if (pcbddc->dbg_flag) {
4390         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4391         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4392         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4393         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4394         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4395         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4396         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4397         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4398         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4399         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4400         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4401         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4402         if (compute_eigs) {
4403           PetscReal lambda_max_s,lambda_min_s;
4404           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4405           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4406           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4407           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4408           for (i=0;i<neigs;i++) {
4409             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4410           }
4411         }
4412         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4413         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4414       }
4415       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4416       if (compute_eigs) {
4417         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4418         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4419       }
4420     }
4421   }
4422   /* print additional info */
4423   if (pcbddc->dbg_flag) {
4424     /* waits until all processes reaches this point */
4425     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4426     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4427     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4428   }
4429 
4430   /* free memory */
4431   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4432   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4433   PetscFunctionReturn(0);
4434 }
4435 
4436 #undef __FUNCT__
4437 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4438 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4439 {
4440   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4441   PC_IS*         pcis = (PC_IS*)pc->data;
4442   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4443   PetscInt       i,coarse_size;
4444   PetscInt       *local_primal_indices;
4445   PetscErrorCode ierr;
4446 
4447   PetscFunctionBegin;
4448   /* Compute global number of coarse dofs */
4449   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
4450     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
4451   }
4452   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
4453 
4454   /* check numbering */
4455   if (pcbddc->dbg_flag) {
4456     PetscScalar coarsesum,*array;
4457     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4458 
4459     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4460     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4461     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4462     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4463     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4464     for (i=0;i<pcbddc->local_primal_size;i++) {
4465       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4466     }
4467     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4468     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4469     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4470     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4471     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4472     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4473     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4474     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4475     for (i=0;i<pcis->n;i++) {
4476       if (array[i] == 1.0) {
4477         set_error = PETSC_TRUE;
4478         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4479       }
4480     }
4481     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4482     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4483     for (i=0;i<pcis->n;i++) {
4484       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4485     }
4486     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4487     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4488     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4489     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4490     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4491     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4492     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4493       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4494       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4495       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4496       for (i=0;i<pcbddc->local_primal_size;i++) {
4497         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
4498       }
4499       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4500     }
4501     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4502     if (set_error_reduced) {
4503       SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4504     }
4505   }
4506   /* get back data */
4507   *coarse_size_n = coarse_size;
4508   *local_primal_indices_n = local_primal_indices;
4509   PetscFunctionReturn(0);
4510 }
4511 
4512 #undef __FUNCT__
4513 #define __FUNCT__ "PCBDDCGlobalToLocal"
4514 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4515 {
4516   IS             localis_t;
4517   PetscInt       i,lsize,*idxs,n;
4518   PetscScalar    *vals;
4519   PetscErrorCode ierr;
4520 
4521   PetscFunctionBegin;
4522   /* get indices in local ordering exploiting local to global map */
4523   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4524   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4525   for (i=0;i<lsize;i++) vals[i] = 1.0;
4526   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4527   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4528   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4529   if (idxs) { /* multilevel guard */
4530     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4531   }
4532   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4533   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4534   ierr = PetscFree(vals);CHKERRQ(ierr);
4535   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4536   /* now compute set in local ordering */
4537   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4538   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4539   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4540   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4541   for (i=0,lsize=0;i<n;i++) {
4542     if (PetscRealPart(vals[i]) > 0.5) {
4543       lsize++;
4544     }
4545   }
4546   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4547   for (i=0,lsize=0;i<n;i++) {
4548     if (PetscRealPart(vals[i]) > 0.5) {
4549       idxs[lsize++] = i;
4550     }
4551   }
4552   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4553   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4554   *localis = localis_t;
4555   PetscFunctionReturn(0);
4556 }
4557 
4558 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4559 #undef __FUNCT__
4560 #define __FUNCT__ "PCBDDCMatMult_Private"
4561 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4562 {
4563   PCBDDCChange_ctx change_ctx;
4564   PetscErrorCode   ierr;
4565 
4566   PetscFunctionBegin;
4567   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4568   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4569   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4570   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4571   PetscFunctionReturn(0);
4572 }
4573 
4574 #undef __FUNCT__
4575 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4576 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4577 {
4578   PCBDDCChange_ctx change_ctx;
4579   PetscErrorCode   ierr;
4580 
4581   PetscFunctionBegin;
4582   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4583   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4584   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4585   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4586   PetscFunctionReturn(0);
4587 }
4588 
4589 #undef __FUNCT__
4590 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4591 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4592 {
4593   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4594   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4595   PetscInt            *used_xadj,*used_adjncy;
4596   PetscBool           free_used_adj;
4597   PetscErrorCode      ierr;
4598 
4599   PetscFunctionBegin;
4600   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4601   free_used_adj = PETSC_FALSE;
4602   if (pcbddc->sub_schurs_layers == -1) {
4603     used_xadj = NULL;
4604     used_adjncy = NULL;
4605   } else {
4606     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4607       used_xadj = pcbddc->mat_graph->xadj;
4608       used_adjncy = pcbddc->mat_graph->adjncy;
4609     } else if (pcbddc->computed_rowadj) {
4610       used_xadj = pcbddc->mat_graph->xadj;
4611       used_adjncy = pcbddc->mat_graph->adjncy;
4612     } else {
4613       PetscBool      flg_row=PETSC_FALSE;
4614       const PetscInt *xadj,*adjncy;
4615       PetscInt       nvtxs;
4616 
4617       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4618       if (flg_row) {
4619         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4620         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4621         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4622         free_used_adj = PETSC_TRUE;
4623       } else {
4624         pcbddc->sub_schurs_layers = -1;
4625         used_xadj = NULL;
4626         used_adjncy = NULL;
4627       }
4628       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4629     }
4630   }
4631   ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->adaptive_selection,pcbddc->use_deluxe_scaling,pcbddc->adaptive_invert_Stildas,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4632 
4633   /* free adjacency */
4634   if (free_used_adj) {
4635     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4636   }
4637   PetscFunctionReturn(0);
4638 }
4639 
4640 #undef __FUNCT__
4641 #define __FUNCT__ "PCBDDCInitSubSchurs"
4642 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4643 {
4644   PC_IS               *pcis=(PC_IS*)pc->data;
4645   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4646   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4647   PCBDDCGraph         graph;
4648   Mat                 S_j;
4649   PetscErrorCode      ierr;
4650 
4651   PetscFunctionBegin;
4652   /* attach interface graph for determining subsets */
4653   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4654     IS verticesIS;
4655 
4656     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4657     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4658     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr);
4659     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr);
4660     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4661     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4662 /*
4663     if (pcbddc->dbg_flag) {
4664       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4665     }
4666 */
4667   } else {
4668     graph = pcbddc->mat_graph;
4669   }
4670 
4671   /* Create Schur complement matrix */
4672   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4673   ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4674 
4675   /* sub_schurs init */
4676   ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_threshold);CHKERRQ(ierr);
4677   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4678   /* free graph struct */
4679   if (pcbddc->sub_schurs_rebuild) {
4680     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4681   }
4682   PetscFunctionReturn(0);
4683 }
4684