xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision cf5a6209970185a07c62c3d51231f232aaad6491)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscScalar     *Smult,*Seigv;
20   PetscReal       *eigs,thresh;
21   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscErrorCode  ierr;
26 
27   PetscFunctionBegin;
28   if (!sub_schurs->use_mumps) {
29     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
30   }
31   if (!sub_schurs->is_hermitian || !sub_schurs->is_posdef) {
32     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented");
33   }
34 
35   /* max size of subsets */
36   mss = 0;
37   for (i=0;i<sub_schurs->n_subs;i++) {
38     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
39       PetscInt subset_size;
40       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
41       mss = PetscMax(mss,subset_size);
42     }
43   }
44 
45   /* min/max and threshold */
46   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
47   nmin = pcbddc->adaptive_nmin > -1 ? pcbddc->adaptive_nmin : 1;
48   nmax = PetscMax(nmin,nmax);
49   if (pcbddc->adaptive_threshold > 1.0) {
50     thresh = 1.0/pcbddc->adaptive_threshold;
51   } else {
52     thresh = 1.0;
53   }
54 
55   /* allocate lapack workspace */
56   cum = cum2 = 0;
57   maxneigs = 0;
58   for (i=0;i<sub_schurs->n_subs;i++) {
59     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
60       PetscInt n,subset_size;
61       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
62       n = PetscMin(subset_size,nmax);
63       cum += subset_size*n;
64       cum2 += n;
65       maxneigs = PetscMax(maxneigs,n);
66     }
67   }
68 
69   if (mss) {
70     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
71       PetscBLASInt B_itype = 1;
72       PetscBLASInt B_N = mss;
73       PetscScalar  zero = 0.0;
74       PetscScalar  eps = 0.0; /* dlamch? */
75 
76       B_lwork = -1;
77       S = NULL;
78       St = NULL;
79       eigs = NULL;
80       eigv = NULL;
81       B_iwork = NULL;
82       B_ifail = NULL;
83       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
84 #if defined(PETSC_USE_COMPLEX)
85       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
86 #else
87       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
88 #endif
89       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
90       ierr = PetscFPTrapPop();CHKERRQ(ierr);
91     } else {
92         /* TODO */
93     }
94   } else {
95     lwork = 0;
96   }
97 
98   nv = 0;
99   if (sub_schurs->is_Ej_com) { /* complement of subsets, each entry is a vertex */
100     ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr);
101   }
102   ierr = PetscBLASIntCast((PetscInt)lwork,&B_lwork);CHKERRQ(ierr);
103   ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs,
104                       B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
105 #if defined(PETSC_USE_COMPLEX)
106   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
107 #endif
108   ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr);
109   ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
110                       nv+cum2+1,&pcbddc->adaptive_constraints_ptrs,
111                       nv+cum,&pcbddc->adaptive_constraints_idxs,
112                       nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
113   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
114 
115   maxneigs = 0;
116   cum = cum2 = cumarray = 0;
117   if (sub_schurs->is_Ej_com) {
118     const PetscInt *idxs;
119 
120     ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
121     for (cum=0;cum<nv;cum++) {
122       pcbddc->adaptive_constraints_n[cum] = 1;
123       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
124       pcbddc->adaptive_constraints_ptrs[cum] = cum;
125       pcbddc->adaptive_constraints_data[cum] = 1.0;
126     }
127     cum2 = cum;
128     ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
129   }
130 
131   if (mss) { /* multilevel */
132     if (pcbddc->use_deluxe_scaling) {
133       ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
134     } else {
135       ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr);
136     }
137     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
138   }
139 
140   for (i=0;i<sub_schurs->n_subs;i++) {
141     PetscInt j,subset_size;
142 
143     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
144     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
145       const PetscInt *idxs;
146       PetscScalar    one = 1.0,zero=0.0;
147       PetscBLASInt   B_N;
148 
149       /* S should be copied since we need it for deluxe scaling */
150       if (sub_schurs->is_hermitian) {
151         PetscInt j;
152         for (j=0;j<subset_size;j++) {
153           ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
154         }
155         for (j=0;j<subset_size;j++) {
156           ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
157         }
158       } else {
159         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
160         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
161       }
162       /* is always this the right matrix? */
163       ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
164 
165       /* we could reuse space already allocated when building sum_S_Ej_tilda_all */
166       /* St = Starray+cumarray; */
167 
168       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
169         PetscBLASInt B_itype = 1;
170         PetscBLASInt B_IL = 1, B_IU;
171         PetscScalar  eps = -1.0; /* dlamch? */
172         PetscInt     nmin_s;
173 
174         /* ask for eigenvalues lower than thresh */
175         PetscPrintf(PETSC_COMM_SELF,"[%d] Computing for sub %d/%d.\n",PetscGlobalRank,i,sub_schurs->n_subs);
176         ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
177         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
178 #if defined(PETSC_USE_COMPLEX)
179         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
180 #else
181         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
182 #endif
183         ierr = PetscFPTrapPop();CHKERRQ(ierr);
184         if (B_ierr) {
185           if (B_ierr < 0 ) {
186             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
187           } else if (B_ierr <= B_N) {
188             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
189           } else {
190             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
191           }
192         }
193 
194         if (B_neigs > nmax) {
195           PetscPrintf(PETSC_COMM_SELF,"[%d]   found %d eigs, more than maximum required %d.\n",PetscGlobalRank,B_neigs,nmax);
196           B_neigs = nmax;
197         }
198 
199         nmin_s = PetscMin(nmin,B_N);
200         if (B_neigs < nmin_s) {
201           PetscBLASInt B_neigs2;
202 
203           B_IL = B_neigs + 1;
204           ierr = PetscBLASIntCast(nmin_s,&B_IU);CHKERRQ(ierr);
205           PetscPrintf(PETSC_COMM_SELF,"[%d]   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",PetscGlobalRank,B_neigs,nmin,B_IL,B_IU);
206           if (sub_schurs->is_hermitian) {
207             PetscInt j;
208             for (j=0;j<subset_size;j++) {
209               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
210             }
211             for (j=0;j<subset_size;j++) {
212               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
213             }
214           } else {
215             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
216             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
217           }
218           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
219 #if defined(PETSC_USE_COMPLEX)
220           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
221 #else
222           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
223 #endif
224           ierr = PetscFPTrapPop();CHKERRQ(ierr);
225           B_neigs += B_neigs2;
226         }
227         if (B_ierr) {
228           if (B_ierr < 0 ) {
229             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
230           } else if (B_ierr <= B_N) {
231             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
232           } else {
233             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
234           }
235         }
236         PetscPrintf(PETSC_COMM_SELF,"[%d]   -> Got %d eigs\n",PetscGlobalRank,B_neigs);
237         for (j=0;j<B_neigs;j++) {
238           if (eigs[j] == 0.0) {
239             PetscPrintf(PETSC_COMM_SELF,"[%d]     Inf\n",PetscGlobalRank);
240           } else {
241             PetscPrintf(PETSC_COMM_SELF,"[%d]     %1.6e\n",PetscGlobalRank,1.0/eigs[j]);
242           }
243         }
244       } else {
245           /* TODO */
246       }
247       maxneigs = PetscMax(B_neigs,maxneigs);
248       pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
249 
250       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
251       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&zero,Seigv,&B_N));
252       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
253 #if 0
254       PetscInt ii;
255       for (ii=0;ii<B_neigs;ii++) {
256         PetscPrintf(PETSC_COMM_SELF,"[%d]   -> Eigenvector %d/%d (%d)\n",PetscGlobalRank,ii,B_neigs,B_N);
257         for (j=0;j<B_N;j++) {
258           PetscPrintf(PETSC_COMM_SELF,"[%d]       %1.4e %1.4e\n",PetscGlobalRank,eigv[ii*B_N+j],Seigv[ii*B_N+j]);
259         }
260       }
261 #endif
262       ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
263       for (j=0;j<B_neigs;j++) {
264 #if 0
265         {
266           PetscBLASInt Blas_N,Blas_one = 1.0;
267           PetscScalar norm;
268           ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
269           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
270           if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
271             norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
272           } else {
273             norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
274           }
275           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
276         }
277 #endif
278         ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
279         pcbddc->adaptive_constraints_ptrs[cum++] = cum2;
280         cum2 += subset_size;
281       }
282       ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
283     }
284     /* shift for next computation */
285     cumarray += subset_size*subset_size;
286   }
287   pcbddc->adaptive_constraints_ptrs[cum] = cum2;
288   ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr);
289 
290   if (mss) {
291     if (pcbddc->use_deluxe_scaling) {
292       ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
293     } else {
294       ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr);
295     }
296     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
297   }
298   ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
299 #if defined(PETSC_USE_COMPLEX)
300   ierr = PetscFree(rwork);CHKERRQ(ierr);
301 #endif
302   if (pcbddc->dbg_flag) {
303     PetscInt maxneigs_r;
304     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
305     ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
306   }
307   PetscFunctionReturn(0);
308 }
309 
310 #undef __FUNCT__
311 #define __FUNCT__ "PCBDDCSetUpSolvers"
312 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
313 {
314   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
315   PetscScalar    *coarse_submat_vals;
316   PetscErrorCode ierr;
317 
318   PetscFunctionBegin;
319   /* Setup local scatters R_to_B and (optionally) R_to_D */
320   /* PCBDDCSetUpLocalWorkVectors should be called first! */
321   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
322 
323   /* Setup local neumann solver ksp_R */
324   /* PCBDDCSetUpLocalScatters should be called first! */
325   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
326 
327   /* Change global null space passed in by the user if change of basis has been requested */
328   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
329     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
330   }
331 
332   /*
333      Setup local correction and local part of coarse basis.
334      Gives back the dense local part of the coarse matrix in column major ordering
335   */
336   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
337 
338   /* Compute total number of coarse nodes and setup coarse solver */
339   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
340 
341   /* free */
342   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
343   PetscFunctionReturn(0);
344 }
345 
346 #undef __FUNCT__
347 #define __FUNCT__ "PCBDDCResetCustomization"
348 PetscErrorCode PCBDDCResetCustomization(PC pc)
349 {
350   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
351   PetscErrorCode ierr;
352 
353   PetscFunctionBegin;
354   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
355   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
356   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
357   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
358   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
359   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
360   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
361   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
362   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
363   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
364   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
365   PetscFunctionReturn(0);
366 }
367 
368 #undef __FUNCT__
369 #define __FUNCT__ "PCBDDCResetTopography"
370 PetscErrorCode PCBDDCResetTopography(PC pc)
371 {
372   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
373   PetscErrorCode ierr;
374 
375   PetscFunctionBegin;
376   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
377   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
378   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
379   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
380   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
381   PetscFunctionReturn(0);
382 }
383 
384 #undef __FUNCT__
385 #define __FUNCT__ "PCBDDCResetSolvers"
386 PetscErrorCode PCBDDCResetSolvers(PC pc)
387 {
388   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
389   PetscScalar    *array;
390   PetscErrorCode ierr;
391 
392   PetscFunctionBegin;
393   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
394   if (pcbddc->coarse_phi_B) {
395     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
396     ierr = PetscFree(array);CHKERRQ(ierr);
397   }
398   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
399   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
400   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
401   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
402   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
403   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
404   if (pcbddc->local_auxmat2) {
405     ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
406     ierr = PetscFree(array);CHKERRQ(ierr);
407   }
408   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
409   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
410   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
411   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
412   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
413   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
414   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
415   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
416   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
417   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
418   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
419   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
420   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
421   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
422   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
423   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
424   PetscFunctionReturn(0);
425 }
426 
427 #undef __FUNCT__
428 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
429 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
430 {
431   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
432   PC_IS          *pcis = (PC_IS*)pc->data;
433   VecType        impVecType;
434   PetscInt       n_constraints,n_R,old_size;
435   PetscErrorCode ierr;
436 
437   PetscFunctionBegin;
438   if (!pcbddc->ConstraintMatrix) {
439     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
440   }
441   /* get sizes */
442   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
443   n_R = pcis->n-pcbddc->n_actual_vertices;
444   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
445   /* local work vectors (try to avoid unneeded work)*/
446   /* R nodes */
447   old_size = -1;
448   if (pcbddc->vec1_R) {
449     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
450   }
451   if (n_R != old_size) {
452     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
453     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
454     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
455     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
456     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
457     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
458   }
459   /* local primal dofs */
460   old_size = -1;
461   if (pcbddc->vec1_P) {
462     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
463   }
464   if (pcbddc->local_primal_size != old_size) {
465     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
466     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
467     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
468     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
469   }
470   /* local explicit constraints */
471   old_size = -1;
472   if (pcbddc->vec1_C) {
473     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
474   }
475   if (n_constraints && n_constraints != old_size) {
476     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
477     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
478     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
479     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
480   }
481   PetscFunctionReturn(0);
482 }
483 
484 #undef __FUNCT__
485 #define __FUNCT__ "PCBDDCSetUpCorrection"
486 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
487 {
488   PetscErrorCode         ierr;
489   /* pointers to pcis and pcbddc */
490   PC_IS*                 pcis = (PC_IS*)pc->data;
491   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
492   /* submatrices of local problem */
493   Mat                    A_RV,A_VR,A_VV;
494   /* submatrices of local coarse problem */
495   Mat                    S_VV,S_CV,S_VC,S_CC;
496   /* working matrices */
497   Mat                    C_CR;
498   /* additional working stuff */
499   PC                     pc_R;
500   Mat                    F;
501   PetscBool              isLU,isCHOL,isILU;
502 
503   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
504   PetscScalar            *work;
505   PetscInt               *idx_V_B;
506   PetscInt               n,n_vertices,n_constraints;
507   PetscInt               i,n_R,n_D,n_B;
508   PetscBool              unsymmetric_check;
509   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
510   MatType                impMatType;
511   /* some shortcuts to scalars */
512   PetscScalar            one=1.0,m_one=-1.0;
513 
514   PetscFunctionBegin;
515   /* get number of vertices (corners plus constraints with change of basis)
516      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
517   n_vertices = pcbddc->n_actual_vertices;
518   n_constraints = pcbddc->local_primal_size-n_vertices;
519   /* Set Non-overlapping dimensions */
520   n_B = pcis->n_B; n_D = pcis->n - n_B;
521   n_R = pcis->n-n_vertices;
522 
523   /* Set types for local objects needed by BDDC precondtioner */
524   impMatType = MATSEQDENSE;
525 
526   /* vertices in boundary numbering */
527   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
528   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
529   if (i != n_vertices) {
530     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
531   }
532 
533   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
534   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
535   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
536   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
537   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
538   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
539   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
540   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
541   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
542   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
543 
544   unsymmetric_check = PETSC_FALSE;
545   /* allocate workspace */
546   n = 0;
547   if (n_constraints) {
548     n += n_R*n_constraints;
549   }
550   if (n_vertices) {
551     n = PetscMax(2*n_R*n_vertices,n);
552   }
553   if (!pcbddc->issym) {
554     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
555     unsymmetric_check = PETSC_TRUE;
556   }
557   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
558 
559   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
560   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
561   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
562   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
563   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
564   if (isLU || isILU || isCHOL) {
565     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
566   } else {
567     F = NULL;
568   }
569 
570   /* Precompute stuffs needed for preprocessing and application of BDDC*/
571   if (n_constraints) {
572     Mat M1,M2,M3;
573     IS  is_aux;
574     /* see if we can save some allocations */
575     if (pcbddc->local_auxmat2) {
576       PetscInt on_R,on_constraints;
577       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
578       if (on_R != n_R || on_constraints != n_constraints) {
579         PetscScalar *marray;
580 
581         ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr);
582         ierr = PetscFree(marray);CHKERRQ(ierr);
583         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
584         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
585       }
586     }
587     /* auxiliary matrices */
588     if (!pcbddc->local_auxmat2) {
589       PetscScalar *marray;
590 
591       ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr);
592       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr);
593       marray += n_R*n_constraints;
594       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr);
595     }
596 
597     /* Extract constraints on R nodes: C_{CR}  */
598     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
599     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
600     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
601 
602     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
603     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
604     for (i=0;i<n_constraints;i++) {
605       const PetscScalar *row_cmat_values;
606       const PetscInt    *row_cmat_indices;
607       PetscInt          size_of_constraint,j;
608 
609       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
610       for (j=0;j<size_of_constraint;j++) {
611         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
612       }
613       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
614     }
615     if (F) {
616       Mat B;
617 
618       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
619       ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr);
620       ierr = MatDestroy(&B);CHKERRQ(ierr);
621     } else {
622       PetscScalar *xarray;
623       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
624       for (i=0;i<n_constraints;i++) {
625         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
626         ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr);
627         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
628         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
629         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
630       }
631       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
632     }
633 
634     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
635     ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
636     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
637     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
638     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
639     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
640     ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr);
641     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
642     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
643     ierr = MatDestroy(&M2);CHKERRQ(ierr);
644     ierr = MatDestroy(&M3);CHKERRQ(ierr);
645     /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */
646     ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
647     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
648     ierr = MatDestroy(&M1);CHKERRQ(ierr);
649   }
650   /* Get submatrices from subdomain matrix */
651   if (n_vertices) {
652     Mat       newmat;
653     IS        is_aux;
654     PetscInt  ibs,mbs;
655     PetscBool issbaij;
656 
657     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
658     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
659     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
660     if (ibs != mbs) { /* need to convert to SEQAIJ */
661       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
662       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
663       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
664       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
665       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
666     } else {
667       /* this is safe */
668       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
669       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
670       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
671         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
672         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
673         ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
674         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
675         ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
676       } else {
677         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
678         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
679       }
680     }
681     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
682   }
683 
684   /* Matrix of coarse basis functions (local) */
685   if (pcbddc->coarse_phi_B) {
686     PetscInt on_B,on_primal,on_D=n_D;
687     if (pcbddc->coarse_phi_D) {
688       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
689     }
690     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
691     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
692       PetscScalar *marray;
693 
694       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
695       ierr = PetscFree(marray);CHKERRQ(ierr);
696       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
697       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
698       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
699       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
700     }
701   }
702 
703   if (!pcbddc->coarse_phi_B) {
704     PetscScalar *marray;
705 
706     n = n_B*pcbddc->local_primal_size;
707     if (pcbddc->switch_static || pcbddc->dbg_flag) {
708       n += n_D*pcbddc->local_primal_size;
709     }
710     if (!pcbddc->issym) {
711       n *= 2;
712     }
713     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
714     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
715     n = n_B*pcbddc->local_primal_size;
716     if (pcbddc->switch_static || pcbddc->dbg_flag) {
717       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
718       n += n_D*pcbddc->local_primal_size;
719     }
720     if (!pcbddc->issym) {
721       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
722       if (pcbddc->switch_static || pcbddc->dbg_flag) {
723         n = n_B*pcbddc->local_primal_size;
724         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
725       }
726     } else {
727       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
728       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
729       if (pcbddc->switch_static || pcbddc->dbg_flag) {
730         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
731         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
732       }
733     }
734   }
735   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
736   /* vertices */
737   if (n_vertices) {
738 
739     if (n_R) {
740       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
741       PetscBLASInt B_N,B_one = 1;
742       PetscScalar  *x,*y;
743 
744       ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
745       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
746       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
747       if (F) {
748         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
749       } else {
750         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
751         for (i=0;i<n_vertices;i++) {
752           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
753           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
754           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
755           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
756           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
757         }
758         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
759       }
760       ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr);
761       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
762       if (n_constraints) {
763         Mat B;
764         ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
765         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
766         ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
767         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
768         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
769         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
770         ierr = MatDestroy(&B);CHKERRQ(ierr);
771       }
772       ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
773       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
774       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
775       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
776       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
777       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
778       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
779       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
780       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
781       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
782       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
783       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
784     } else {
785       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
786       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
787     }
788     /* coarse basis functions */
789     for (i=0;i<n_vertices;i++) {
790       PetscScalar *y;
791 
792       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
793       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
794       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
795       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
796       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
797       y[n_B*i+idx_V_B[i]] = 1.0;
798       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
799       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
800 
801       if (pcbddc->switch_static || pcbddc->dbg_flag) {
802         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
803         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
804         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
805         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
806         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
807         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
808       }
809       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
810     }
811     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
812     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
813   }
814 
815   if (n_constraints) {
816     Mat B;
817 
818     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
819     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
820     ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
821     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
822     if (n_vertices) {
823       ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr);
824     }
825     ierr = MatDestroy(&B);CHKERRQ(ierr);
826     /* coarse basis functions */
827     for (i=0;i<n_constraints;i++) {
828       PetscScalar *y;
829 
830       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
831       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
832       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
833       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
834       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
835       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
836       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
837 
838       if (pcbddc->switch_static || pcbddc->dbg_flag) {
839         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
840         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
841         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
842         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
843         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
844         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
845       }
846       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
847     }
848   }
849 
850   /* compute other basis functions for non-symmetric problems */
851   if (!pcbddc->issym) {
852     Mat B,X;
853 
854     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr);
855 
856     if (n_constraints) {
857       Mat S_CCT,B_C;
858 
859       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
860       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
861       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
862       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
863       if (n_vertices) {
864         Mat B_V,S_VCT;
865 
866         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
867         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
868         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
869         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
870         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
871       }
872       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
873     }
874     if (n_vertices && n_R) {
875       Mat          A_VRT;
876       PetscBLASInt B_N,B_one = 1;
877 
878       if (!n_constraints) { /* if there are no constraints, reset work */
879         ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
880       }
881       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr);
882       ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
883       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
884       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one));
885       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
886     }
887 
888     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr);
889     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
890       for (i=0;i<pcbddc->local_primal_size;i++) {
891         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
892         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
893         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
894         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
895         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
896       }
897     } else {
898       for (i=0;i<pcbddc->local_primal_size;i++) {
899         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
900         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
901         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
902         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
903         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
904       }
905     }
906     ierr = MatDestroy(&B);CHKERRQ(ierr);
907     /* coarse basis functions */
908     for (i=0;i<pcbddc->local_primal_size;i++) {
909       PetscScalar *y;
910 
911       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
912       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
913       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
914       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
915       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
916       if (i<n_vertices) {
917         y[n_B*i+idx_V_B[i]] = 1.0;
918       }
919       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
920       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
921 
922       if (pcbddc->switch_static || pcbddc->dbg_flag) {
923         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
924         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
925         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
926         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
927         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
928         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
929       }
930       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
931     }
932     ierr = MatDestroy(&X);CHKERRQ(ierr);
933   }
934 
935   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
936   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
937   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
938   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
939   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
940   /* Checking coarse_sub_mat and coarse basis functios */
941   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
942   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
943   if (pcbddc->dbg_flag) {
944     Mat         coarse_sub_mat;
945     Mat         AUXMAT,TM1,TM2,TM3,TM4;
946     Mat         coarse_phi_D,coarse_phi_B;
947     Mat         coarse_psi_D,coarse_psi_B;
948     Mat         A_II,A_BB,A_IB,A_BI;
949     MatType     checkmattype=MATSEQAIJ;
950     PetscReal   real_value;
951 
952     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
953     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
954     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
955     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
956     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
957     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
958     if (unsymmetric_check) {
959       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
960       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
961     }
962     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
963 
964     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
965     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr);
966     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
967     if (unsymmetric_check) {
968       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
969       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
970       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
971       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
972       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
973       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
974       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
975       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
976       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
977       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
978       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
979       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
980     } else {
981       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
982       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
983       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
984       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
985       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
986       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
987       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
988       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
989     }
990     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
991     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
992     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
993     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
994     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
995     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
996 
997     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
998     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
999     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1000     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1001     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1002     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1003     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1004     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1005     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1006     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1007     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1008     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1009     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1010     if (unsymmetric_check) {
1011       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1012       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1013     }
1014     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1015   }
1016 
1017   /* free memory */
1018   ierr = PetscFree(work);CHKERRQ(ierr);
1019   if (n_vertices) {
1020     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1021   }
1022   if (n_constraints) {
1023     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1024   }
1025   /* get back data */
1026   *coarse_submat_vals_n = coarse_submat_vals;
1027   PetscFunctionReturn(0);
1028 }
1029 
1030 #undef __FUNCT__
1031 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1032 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1033 {
1034   Mat            *work_mat;
1035   IS             isrow_s,iscol_s;
1036   PetscBool      rsorted,csorted;
1037   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1038   PetscErrorCode ierr;
1039 
1040   PetscFunctionBegin;
1041   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1042   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1043   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1044   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1045 
1046   if (!rsorted) {
1047     const PetscInt *idxs;
1048     PetscInt *idxs_sorted,i;
1049 
1050     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1051     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1052     for (i=0;i<rsize;i++) {
1053       idxs_perm_r[i] = i;
1054     }
1055     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1056     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1057     for (i=0;i<rsize;i++) {
1058       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1059     }
1060     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1061     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1062   } else {
1063     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1064     isrow_s = isrow;
1065   }
1066 
1067   if (!csorted) {
1068     if (isrow == iscol) {
1069       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1070       iscol_s = isrow_s;
1071     } else {
1072       const PetscInt *idxs;
1073       PetscInt *idxs_sorted,i;
1074 
1075       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1076       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1077       for (i=0;i<csize;i++) {
1078         idxs_perm_c[i] = i;
1079       }
1080       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1081       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1082       for (i=0;i<csize;i++) {
1083         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1084       }
1085       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1086       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1087     }
1088   } else {
1089     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1090     iscol_s = iscol;
1091   }
1092 
1093   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1094 
1095   if (!rsorted || !csorted) {
1096     Mat      new_mat;
1097     IS       is_perm_r,is_perm_c;
1098 
1099     if (!rsorted) {
1100       PetscInt *idxs_r,i;
1101       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1102       for (i=0;i<rsize;i++) {
1103         idxs_r[idxs_perm_r[i]] = i;
1104       }
1105       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1106       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1107     } else {
1108       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1109     }
1110     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1111 
1112     if (!csorted) {
1113       if (isrow_s == iscol_s) {
1114         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1115         is_perm_c = is_perm_r;
1116       } else {
1117         PetscInt *idxs_c,i;
1118         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1119         for (i=0;i<csize;i++) {
1120           idxs_c[idxs_perm_c[i]] = i;
1121         }
1122         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1123         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1124       }
1125     } else {
1126       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1127     }
1128     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1129 
1130     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1131     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1132     work_mat[0] = new_mat;
1133     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1134     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1135   }
1136 
1137   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1138   *B = work_mat[0];
1139   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1140   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1141   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #undef __FUNCT__
1146 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1147 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1148 {
1149   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1150   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1151   Mat            new_mat;
1152   IS             is_local,is_global;
1153   PetscInt       local_size;
1154   PetscBool      isseqaij;
1155   PetscErrorCode ierr;
1156 
1157   PetscFunctionBegin;
1158   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1159   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1160   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1161   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1162   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1163   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1164   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1165 
1166   /* check */
1167   if (pcbddc->dbg_flag) {
1168     Vec       x,x_change;
1169     PetscReal error;
1170 
1171     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1172     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1173     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1174     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1175     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1176     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1177     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1178     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1179     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1180     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1181     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1182     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1183     ierr = VecDestroy(&x);CHKERRQ(ierr);
1184     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1185   }
1186 
1187   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1188   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1189   if (isseqaij) {
1190     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1191   } else {
1192     Mat work_mat;
1193     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1194     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1195     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1196   }
1197   ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
1198   /*
1199   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1200   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1201   */
1202   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1203   PetscFunctionReturn(0);
1204 }
1205 
1206 #undef __FUNCT__
1207 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1208 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1209 {
1210   PC_IS*         pcis = (PC_IS*)(pc->data);
1211   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1212   IS             is_aux1,is_aux2;
1213   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1214   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
1215   PetscInt       vbs,bs;
1216   PetscBT        bitmask;
1217   PetscErrorCode ierr;
1218 
1219   PetscFunctionBegin;
1220   /*
1221     No need to setup local scatters if
1222       - primal space is unchanged
1223         AND
1224       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1225         AND
1226       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1227   */
1228   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1229     PetscFunctionReturn(0);
1230   }
1231   /* destroy old objects */
1232   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1233   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1234   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1235   /* Set Non-overlapping dimensions */
1236   n_B = pcis->n_B; n_D = pcis->n - n_B;
1237   n_vertices = pcbddc->n_actual_vertices;
1238   /* create auxiliary bitmask */
1239   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1240   for (i=0;i<n_vertices;i++) {
1241     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
1242   }
1243 
1244   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1245   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1246   for (i=0, n_R=0; i<pcis->n; i++) {
1247     if (!PetscBTLookup(bitmask,i)) {
1248       idx_R_local[n_R] = i;
1249       n_R++;
1250     }
1251   }
1252 
1253   /* Block code */
1254   vbs = 1;
1255   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1256   if (bs>1 && !(n_vertices%bs)) {
1257     PetscBool is_blocked = PETSC_TRUE;
1258     PetscInt  *vary;
1259     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1260     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1261     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1262     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
1263     for (i=0; i<n_vertices; i++) {
1264       if (vary[i]!=0 && vary[i]!=bs) {
1265         is_blocked = PETSC_FALSE;
1266         break;
1267       }
1268     }
1269     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1270       vbs = bs;
1271       for (i=0;i<n_R/vbs;i++) {
1272         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1273       }
1274     }
1275     ierr = PetscFree(vary);CHKERRQ(ierr);
1276   }
1277   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1278   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1279 
1280   /* print some info if requested */
1281   if (pcbddc->dbg_flag) {
1282     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1283     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1284     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1285     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1286     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1287     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1288     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
1289     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1290   }
1291 
1292   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1293   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1294   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1295   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1296   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1297   for (i=0; i<n_D; i++) {
1298     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1299   }
1300   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1301   for (i=0, j=0; i<n_R; i++) {
1302     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1303       aux_array1[j++] = i;
1304     }
1305   }
1306   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1307   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1308   for (i=0, j=0; i<n_B; i++) {
1309     if (!PetscBTLookup(bitmask,is_indices[i])) {
1310       aux_array2[j++] = i;
1311     }
1312   }
1313   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1314   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1315   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1316   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1317   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1318 
1319   if (pcbddc->switch_static || pcbddc->dbg_flag) {
1320     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1321     for (i=0, j=0; i<n_R; i++) {
1322       if (PetscBTLookup(bitmask,idx_R_local[i])) {
1323         aux_array1[j++] = i;
1324       }
1325     }
1326     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1327     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1328     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1329   }
1330   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1331   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1332   PetscFunctionReturn(0);
1333 }
1334 
1335 
1336 #undef __FUNCT__
1337 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1338 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1339 {
1340   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1341   PC_IS          *pcis = (PC_IS*)pc->data;
1342   PC             pc_temp;
1343   Mat            A_RR;
1344   MatReuse       reuse;
1345   PetscScalar    m_one = -1.0;
1346   PetscReal      value;
1347   PetscInt       n_D,n_R,ibs,mbs;
1348   PetscBool      use_exact,use_exact_reduced,issbaij;
1349   PetscErrorCode ierr;
1350   /* prefixes stuff */
1351   char           dir_prefix[256],neu_prefix[256],str_level[16];
1352   size_t         len;
1353 
1354   PetscFunctionBegin;
1355 
1356   /* compute prefixes */
1357   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1358   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1359   if (!pcbddc->current_level) {
1360     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1361     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1362     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1363     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1364   } else {
1365     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1366     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1367     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1368     len -= 15; /* remove "pc_bddc_coarse_" */
1369     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1370     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1371     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1372     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1373     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1374     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1375     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1376     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1377   }
1378 
1379   /* DIRICHLET PROBLEM */
1380   if (dirichlet) {
1381     /* Matrix for Dirichlet problem is pcis->A_II */
1382     ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1383     if (!pcbddc->ksp_D) { /* create object if not yet build */
1384       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1385       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1386       /* default */
1387       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1388       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1389       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1390       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1391       if (issbaij) {
1392         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1393       } else {
1394         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1395       }
1396       /* Allow user's customization */
1397       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1398       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1399     }
1400     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1401     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1402     if (!n_D) {
1403       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1404       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1405     }
1406     /* Set Up KSP for Dirichlet problem of BDDC */
1407     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1408     /* set ksp_D into pcis data */
1409     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1410     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1411     pcis->ksp_D = pcbddc->ksp_D;
1412   }
1413 
1414   /* NEUMANN PROBLEM */
1415   A_RR = 0;
1416   if (neumann) {
1417     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1418     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1419     if (pcbddc->ksp_R) { /* already created ksp */
1420       PetscInt nn_R;
1421       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1422       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1423       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1424       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1425         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1426         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1427         reuse = MAT_INITIAL_MATRIX;
1428       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1429         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1430           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1431           reuse = MAT_INITIAL_MATRIX;
1432         } else { /* safe to reuse the matrix */
1433           reuse = MAT_REUSE_MATRIX;
1434         }
1435       }
1436       /* last check */
1437       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1438         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1439         reuse = MAT_INITIAL_MATRIX;
1440       }
1441     } else { /* first time, so we need to create the matrix */
1442       reuse = MAT_INITIAL_MATRIX;
1443     }
1444     /* extract A_RR */
1445     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1446     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1447     if (ibs != mbs) {
1448       Mat newmat;
1449       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1450       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1451       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1452     } else {
1453       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1454     }
1455     if (!pcbddc->ksp_R) { /* create object if not present */
1456       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1457       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1458       /* default */
1459       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1460       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1461       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1462       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1463       if (issbaij) {
1464         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1465       } else {
1466         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1467       }
1468       /* Allow user's customization */
1469       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1470       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1471     }
1472     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1473     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1474     if (!n_R) {
1475       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1476       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1477     }
1478     /* Set Up KSP for Neumann problem of BDDC */
1479     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1480   }
1481 
1482   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1483   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1484     if (pcbddc->dbg_flag) {
1485       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1486       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1487       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1488     }
1489     if (dirichlet) { /* Dirichlet */
1490       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1491       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1492       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1493       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1494       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1495       /* need to be adapted? */
1496       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1497       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1498       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1499       /* print info */
1500       if (pcbddc->dbg_flag) {
1501         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1502         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1503       }
1504       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1505         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1506       }
1507     }
1508     if (neumann) { /* Neumann */
1509       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1510       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1511       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1512       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1513       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1514       /* need to be adapted? */
1515       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1516       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1517       /* print info */
1518       if (pcbddc->dbg_flag) {
1519         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1520         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1521       }
1522       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1523         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1524       }
1525     }
1526   }
1527   /* free Neumann problem's matrix */
1528   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 #undef __FUNCT__
1533 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1534 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1535 {
1536   PetscErrorCode ierr;
1537   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1538 
1539   PetscFunctionBegin;
1540   if (applytranspose) {
1541     if (pcbddc->local_auxmat1) {
1542       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1543       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1544     }
1545     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1546   } else {
1547     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1548     if (pcbddc->local_auxmat1) {
1549       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1550       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1551     }
1552   }
1553   PetscFunctionReturn(0);
1554 }
1555 
1556 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1557 #undef __FUNCT__
1558 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1559 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1560 {
1561   PetscErrorCode ierr;
1562   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1563   PC_IS*            pcis = (PC_IS*)  (pc->data);
1564   const PetscScalar zero = 0.0;
1565 
1566   PetscFunctionBegin;
1567   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1568   if (applytranspose) {
1569     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1570     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1571   } else {
1572     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1573     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1574   }
1575   /* start communications from local primal nodes to rhs of coarse solver */
1576   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1577   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1578   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1579 
1580   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1581   /* TODO remove null space when doing multilevel */
1582   if (pcbddc->coarse_ksp) {
1583     if (applytranspose) {
1584       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1585     } else {
1586       ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1587     }
1588   }
1589 
1590   /* Local solution on R nodes */
1591   if (pcis->n) {
1592     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1593     ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1594     ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1595     if (pcbddc->switch_static) {
1596       ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1597       ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1598     }
1599     ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1600     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1601     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1602     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1603     if (pcbddc->switch_static) {
1604       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1605       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1606     }
1607   }
1608 
1609   /* communications from coarse sol to local primal nodes */
1610   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1611   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1612 
1613   /* Sum contributions from two levels */
1614   if (applytranspose) {
1615     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1616     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1617   } else {
1618     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1619     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1620   }
1621   PetscFunctionReturn(0);
1622 }
1623 
1624 #undef __FUNCT__
1625 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1626 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1627 {
1628   PetscErrorCode ierr;
1629   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1630   PetscScalar    *array;
1631   Vec            from,to;
1632 
1633   PetscFunctionBegin;
1634   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1635     from = pcbddc->coarse_vec;
1636     to = pcbddc->vec1_P;
1637     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1638       Vec tvec;
1639 
1640       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1641       ierr = VecResetArray(tvec);CHKERRQ(ierr);
1642       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1643       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
1644       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
1645       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
1646     }
1647   } else { /* from local to global -> put data in coarse right hand side */
1648     from = pcbddc->vec1_P;
1649     to = pcbddc->coarse_vec;
1650   }
1651   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1652   PetscFunctionReturn(0);
1653 }
1654 
1655 #undef __FUNCT__
1656 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1657 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1658 {
1659   PetscErrorCode ierr;
1660   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1661   PetscScalar    *array;
1662   Vec            from,to;
1663 
1664   PetscFunctionBegin;
1665   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1666     from = pcbddc->coarse_vec;
1667     to = pcbddc->vec1_P;
1668   } else { /* from local to global -> put data in coarse right hand side */
1669     from = pcbddc->vec1_P;
1670     to = pcbddc->coarse_vec;
1671   }
1672   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1673   if (smode == SCATTER_FORWARD) {
1674     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1675       Vec tvec;
1676 
1677       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1678       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
1679       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
1680       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
1681     }
1682   } else {
1683     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
1684      ierr = VecResetArray(from);CHKERRQ(ierr);
1685     }
1686   }
1687   PetscFunctionReturn(0);
1688 }
1689 
1690 /* uncomment for testing purposes */
1691 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1692 #undef __FUNCT__
1693 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1694 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1695 {
1696   PetscErrorCode    ierr;
1697   PC_IS*            pcis = (PC_IS*)(pc->data);
1698   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1699   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1700   /* one and zero */
1701   PetscScalar       one=1.0,zero=0.0;
1702   /* space to store constraints and their local indices */
1703   PetscScalar       *temp_quadrature_constraint;
1704   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1705   /* iterators */
1706   PetscInt          i,j,k,total_counts,temp_start_ptr;
1707   /* BLAS integers */
1708   PetscBLASInt      lwork,lierr;
1709   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1710   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1711   /* reuse */
1712   PetscInt          olocal_primal_size;
1713   PetscInt          *oprimal_indices_local_idxs;
1714   /* change of basis */
1715   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1716   PetscBool         boolforchange,qr_needed;
1717   PetscBT           touched,change_basis,qr_needed_idx;
1718   /* auxiliary stuff */
1719   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1720   PetscInt          ncc,*gidxs=NULL,*permutation=NULL,*temp_indices_to_constraint_work=NULL;
1721   PetscScalar       *temp_quadrature_constraint_work=NULL;
1722   /* some quantities */
1723   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1724   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1725 
1726   PetscFunctionBegin;
1727   /* Destroy Mat objects computed previously */
1728   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1729   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1730 
1731   /* print some info */
1732   if (pcbddc->dbg_flag) {
1733     IS       vertices;
1734     PetscInt nv,nedges,nfaces;
1735     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
1736     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
1737     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
1738     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1739     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1740     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
1741     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_edges);CHKERRQ(ierr);
1742     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_faces);CHKERRQ(ierr);
1743     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1744   }
1745 
1746   if (!pcbddc->adaptive_selection) {
1747     IS           ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1748     MatNullSpace nearnullsp;
1749     const Vec    *nearnullvecs;
1750     Vec          *localnearnullsp;
1751     PetscScalar  *array;
1752     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
1753     PetscBool    nnsp_has_cnst;
1754     /* LAPACK working arrays for SVD or POD */
1755     PetscBool    skip_lapack;
1756     PetscScalar  *work;
1757     PetscReal    *singular_vals;
1758 #if defined(PETSC_USE_COMPLEX)
1759     PetscReal    *rwork;
1760 #endif
1761 #if defined(PETSC_MISSING_LAPACK_GESVD)
1762     PetscScalar  *temp_basis,*correlation_mat;
1763 #else
1764     PetscBLASInt dummy_int;
1765     PetscScalar  dummy_scalar;
1766 #endif
1767 
1768     /* Get index sets for faces, edges and vertices from graph */
1769     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1770     /* free unneeded index sets */
1771     if (!pcbddc->use_vertices) {
1772       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1773     }
1774     if (!pcbddc->use_edges) {
1775       for (i=0;i<n_ISForEdges;i++) {
1776         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1777       }
1778       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1779       n_ISForEdges = 0;
1780     }
1781     if (!pcbddc->use_faces) {
1782       for (i=0;i<n_ISForFaces;i++) {
1783         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1784       }
1785       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1786       n_ISForFaces = 0;
1787     }
1788     /* HACKS (the following two blocks of code) */
1789     if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1790       pcbddc->use_change_of_basis = PETSC_TRUE;
1791       if (!ISForEdges) {
1792         pcbddc->use_change_on_faces = PETSC_TRUE;
1793       }
1794     }
1795     if (pcbddc->NullSpace) {
1796       /* use_change_of_basis should be consistent among processors */
1797       PetscBool tbool[2],gbool[2];
1798       tbool [0] = pcbddc->use_change_of_basis;
1799       tbool [1] = pcbddc->use_change_on_faces;
1800       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1801       pcbddc->use_change_of_basis = gbool[0];
1802       pcbddc->use_change_on_faces = gbool[1];
1803     }
1804 
1805     /* check if near null space is attached to global mat */
1806     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1807     if (nearnullsp) {
1808       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1809       /* remove any stored info */
1810       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1811       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1812       /* store information for BDDC solver reuse */
1813       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1814       pcbddc->onearnullspace = nearnullsp;
1815       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1816       for (i=0;i<nnsp_size;i++) {
1817         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1818       }
1819     } else { /* if near null space is not provided BDDC uses constants by default */
1820       nnsp_size = 0;
1821       nnsp_has_cnst = PETSC_TRUE;
1822     }
1823     /* get max number of constraints on a single cc */
1824     max_constraints = nnsp_size;
1825     if (nnsp_has_cnst) max_constraints++;
1826 
1827     /*
1828          Evaluate maximum storage size needed by the procedure
1829          - temp_indices will contain start index of each constraint stored as follows
1830          - temp_indices_to_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1831          - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1832          - temp_quadrature_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself
1833                                                                                                                                                            */
1834     total_counts = n_ISForFaces+n_ISForEdges;
1835     total_counts *= max_constraints;
1836     n_vertices = 0;
1837     if (ISForVertices) {
1838       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1839     }
1840     total_counts += n_vertices;
1841     ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr);
1842     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1843     total_counts = 0;
1844     max_size_of_constraint = 0;
1845     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1846       if (i<n_ISForEdges) {
1847         used_IS = &ISForEdges[i];
1848       } else {
1849         used_IS = &ISForFaces[i-n_ISForEdges];
1850       }
1851       ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1852       total_counts += j;
1853       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1854     }
1855     total_counts *= max_constraints;
1856     total_counts += n_vertices;
1857     ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1858     /* get local part of global near null space vectors */
1859     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1860     for (k=0;k<nnsp_size;k++) {
1861       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1862       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1863       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1864     }
1865 
1866     /* whether or not to skip lapack calls */
1867     skip_lapack = PETSC_TRUE;
1868     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
1869 
1870     /* allocate some auxiliary stuff */
1871     if (!skip_lapack || pcbddc->use_qr_single) {
1872       ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr);
1873     }
1874 
1875     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1876     if (!skip_lapack) {
1877       PetscScalar temp_work;
1878 
1879 #if defined(PETSC_MISSING_LAPACK_GESVD)
1880       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1881       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1882       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1883       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1884 #if defined(PETSC_USE_COMPLEX)
1885       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1886 #endif
1887       /* now we evaluate the optimal workspace using query with lwork=-1 */
1888       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1889       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1890       lwork = -1;
1891       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1892 #if !defined(PETSC_USE_COMPLEX)
1893       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1894 #else
1895       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1896 #endif
1897       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1898       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1899 #else /* on missing GESVD */
1900       /* SVD */
1901       PetscInt max_n,min_n;
1902       max_n = max_size_of_constraint;
1903       min_n = max_constraints;
1904       if (max_size_of_constraint < max_constraints) {
1905         min_n = max_size_of_constraint;
1906         max_n = max_constraints;
1907       }
1908       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1909 #if defined(PETSC_USE_COMPLEX)
1910       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1911 #endif
1912       /* now we evaluate the optimal workspace using query with lwork=-1 */
1913       lwork = -1;
1914       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1915       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1916       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1917       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1918 #if !defined(PETSC_USE_COMPLEX)
1919       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
1920 #else
1921       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
1922 #endif
1923       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1924       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1925 #endif /* on missing GESVD */
1926       /* Allocate optimal workspace */
1927       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1928       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
1929     }
1930     /* Now we can loop on constraining sets */
1931     total_counts = 0;
1932     temp_indices[0] = 0;
1933     /* vertices */
1934     if (ISForVertices) {
1935       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1936       if (nnsp_has_cnst) { /* consider all vertices */
1937         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1938         for (i=0;i<n_vertices;i++) {
1939           temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1940           temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1941           total_counts++;
1942         }
1943       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1944         PetscBool used_vertex;
1945         for (i=0;i<n_vertices;i++) {
1946           used_vertex = PETSC_FALSE;
1947           k = 0;
1948           while (!used_vertex && k<nnsp_size) {
1949             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1950             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1951               temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1952               temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1953               temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1954               total_counts++;
1955               used_vertex = PETSC_TRUE;
1956             }
1957             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1958             k++;
1959           }
1960         }
1961       }
1962       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1963       n_vertices = total_counts;
1964     }
1965 
1966     /* edges and faces */
1967     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
1968       if (ncc<n_ISForEdges) {
1969         used_IS = &ISForEdges[ncc];
1970         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1971       } else {
1972         used_IS = &ISForFaces[ncc-n_ISForEdges];
1973         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1974       }
1975       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1976       temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1977       ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1978       ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1979       /* change of basis should not be performed on local periodic nodes */
1980       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1981       if (nnsp_has_cnst) {
1982         PetscScalar quad_value;
1983         temp_constraints++;
1984         if (!pcbddc->use_nnsp_true) {
1985           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1986         } else {
1987           quad_value = 1.0;
1988         }
1989         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1990         for (j=0;j<size_of_constraint;j++) {
1991           temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1992         }
1993         /* sort by global ordering if using lapack subroutines (not needed!) */
1994         if (!skip_lapack || pcbddc->use_qr_single) {
1995           ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
1996           for (j=0;j<size_of_constraint;j++) {
1997             permutation[j]=j;
1998           }
1999           ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2000           for (j=0;j<size_of_constraint;j++) {
2001             if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen");
2002           }
2003           for (j=0;j<size_of_constraint;j++) {
2004             temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2005             temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2006           }
2007           ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2008           ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2009         }
2010         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2011         total_counts++;
2012       }
2013       for (k=0;k<nnsp_size;k++) {
2014         PetscReal real_value;
2015         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2016         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2017         for (j=0;j<size_of_constraint;j++) {
2018           temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
2019         }
2020         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2021         /* check if array is null on the connected component */
2022         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2023         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
2024         if (real_value > 0.0) { /* keep indices and values */
2025           /* sort by global ordering if using lapack subroutines */
2026           if (!skip_lapack || pcbddc->use_qr_single) {
2027             ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
2028             for (j=0;j<size_of_constraint;j++) {
2029               permutation[j]=j;
2030             }
2031             ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2032             for (j=0;j<size_of_constraint;j++) {
2033               temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2034               temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2035             }
2036             ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2037             ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2038           }
2039           temp_constraints++;
2040           temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2041           total_counts++;
2042         }
2043       }
2044       ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2045       valid_constraints = temp_constraints;
2046       if (!pcbddc->use_nnsp_true && temp_constraints) {
2047         if (temp_constraints == 1) { /* just normalize the constraint */
2048           PetscScalar norm;
2049           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2050           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2051           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2052           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2053         } else { /* perform SVD */
2054           PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2055 
2056 #if defined(PETSC_MISSING_LAPACK_GESVD)
2057           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2058              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2059              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2060                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2061                 from that computed using LAPACKgesvd
2062              -> This is due to a different computation of eigenvectors in LAPACKheev
2063              -> The quality of the POD-computed basis will be the same */
2064           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2065           /* Store upper triangular part of correlation matrix */
2066           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2067           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2068           for (j=0;j<temp_constraints;j++) {
2069             for (k=0;k<j+1;k++) {
2070               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one));
2071             }
2072           }
2073           /* compute eigenvalues and eigenvectors of correlation matrix */
2074           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2075           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2076 #if !defined(PETSC_USE_COMPLEX)
2077           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2078 #else
2079           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2080 #endif
2081           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2082           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2083           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2084           j = 0;
2085           while (j < temp_constraints && singular_vals[j] < tol) j++;
2086           total_counts = total_counts-j;
2087           valid_constraints = temp_constraints-j;
2088           /* scale and copy POD basis into used quadrature memory */
2089           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2090           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2091           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2092           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2093           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2094           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2095           if (j<temp_constraints) {
2096             PetscInt ii;
2097             for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
2098             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2099             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2100             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2101             for (k=0;k<temp_constraints-j;k++) {
2102               for (ii=0;ii<size_of_constraint;ii++) {
2103                 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2104               }
2105             }
2106           }
2107 #else  /* on missing GESVD */
2108           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2109           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2110           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2111           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2112 #if !defined(PETSC_USE_COMPLEX)
2113           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2114 #else
2115           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2116 #endif
2117           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2118           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2119           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2120           k = temp_constraints;
2121           if (k > size_of_constraint) k = size_of_constraint;
2122           j = 0;
2123           while (j < k && singular_vals[k-j-1] < tol) j++;
2124           valid_constraints = k-j;
2125           total_counts = total_counts-temp_constraints+valid_constraints;
2126 #endif /* on missing GESVD */
2127         }
2128       }
2129       /* setting change_of_basis flag is safe now */
2130       if (boolforchange) {
2131         for (j=0;j<valid_constraints;j++) {
2132           PetscBTSet(change_basis,total_counts-j-1);
2133         }
2134       }
2135     }
2136     /* free workspace */
2137     if (!skip_lapack || pcbddc->use_qr_single) {
2138       ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr);
2139     }
2140     if (!skip_lapack) {
2141       ierr = PetscFree(work);CHKERRQ(ierr);
2142 #if defined(PETSC_USE_COMPLEX)
2143       ierr = PetscFree(rwork);CHKERRQ(ierr);
2144 #endif
2145       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2146 #if defined(PETSC_MISSING_LAPACK_GESVD)
2147       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2148       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2149 #endif
2150     }
2151     for (k=0;k<nnsp_size;k++) {
2152       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2153     }
2154     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2155     /* free index sets of faces, edges and vertices */
2156     for (i=0;i<n_ISForFaces;i++) {
2157       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2158     }
2159     if (n_ISForFaces) {
2160       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2161     }
2162     for (i=0;i<n_ISForEdges;i++) {
2163       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2164     }
2165     if (n_ISForEdges) {
2166       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2167     }
2168     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2169   } else {
2170     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2171     PetscInt        cum = 0;
2172 
2173     total_counts = 0;
2174     n_vertices = 0;
2175     if (sub_schurs->is_Ej_com) {
2176       ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr);
2177     }
2178     max_constraints = 0;
2179     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2180       total_counts += pcbddc->adaptive_constraints_n[i];
2181       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2182     }
2183     temp_indices = pcbddc->adaptive_constraints_ptrs;
2184     temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs;
2185     temp_quadrature_constraint = pcbddc->adaptive_constraints_data;
2186 
2187 #if 0
2188     printf("Found %d totals\n",total_counts);
2189     for (i=0;i<total_counts;i++) {
2190       printf("const %d, start %d",i,temp_indices[i]);
2191       printf(" end %d:\n",temp_indices[i+1]);
2192       for (j=temp_indices[i];j<temp_indices[i+1];j++) {
2193         printf("  idxs %d",temp_indices_to_constraint[j]);
2194         printf("  data %1.2e\n",temp_quadrature_constraint[j]);
2195       }
2196     }
2197     for (i=0;i<n_vertices;i++) {
2198       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i+n_vertices]);
2199     }
2200     for (i=0;i<sub_schurs->n_subs;i++) {
2201       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2202     }
2203 #endif
2204 
2205     for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]);
2206     ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr);
2207     /* Change of basis */
2208     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2209     if (pcbddc->use_change_of_basis) {
2210       cum = n_vertices;
2211       for (i=0;i<sub_schurs->n_subs;i++) {
2212         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2213           for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) {
2214             ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr);
2215           }
2216         }
2217         cum += pcbddc->adaptive_constraints_n[i+n_vertices];
2218       }
2219     }
2220   }
2221 
2222   /* map temp_indices_to_constraint in boundary numbering */
2223   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
2224   if (i != temp_indices[total_counts]) {
2225     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
2226   }
2227 
2228   /* set quantities in pcbddc data structure and store previous primal size */
2229   /* n_vertices defines the number of subdomain corners in the primal space */
2230   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
2231   olocal_primal_size = pcbddc->local_primal_size;
2232   pcbddc->local_primal_size = total_counts;
2233   pcbddc->n_vertices = n_vertices;
2234   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
2235 
2236   /* Create constraint matrix */
2237   /* The constraint matrix is used to compute the l2g map of primal dofs */
2238   /* so we need to set it up properly either with or without change of basis */
2239   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2240   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2241   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2242   /* array to compute a local numbering of constraints : vertices first then constraints */
2243   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
2244   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
2245   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
2246   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
2247   /* auxiliary stuff for basis change */
2248   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
2249   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
2250 
2251   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2252   total_primal_vertices=0;
2253   for (i=0;i<pcbddc->local_primal_size;i++) {
2254     size_of_constraint=temp_indices[i+1]-temp_indices[i];
2255     if (size_of_constraint == 1) {
2256       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
2257       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
2258       aux_primal_minloc[total_primal_vertices]=0;
2259       total_primal_vertices++;
2260     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
2261       PetscInt min_loc,min_index;
2262       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
2263       /* find first untouched local node */
2264       k = 0;
2265       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
2266       min_index = global_indices[k];
2267       min_loc = k;
2268       /* search the minimum among global nodes already untouched on the cc */
2269       for (k=1;k<size_of_constraint;k++) {
2270         /* there can be more than one constraint on a single connected component */
2271         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
2272           min_index = global_indices[k];
2273           min_loc = k;
2274         }
2275       }
2276       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
2277       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
2278       aux_primal_minloc[total_primal_vertices]=min_loc;
2279       total_primal_vertices++;
2280     }
2281   }
2282   /* determine if a QR strategy is needed for change of basis */
2283   qr_needed = PETSC_FALSE;
2284   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
2285   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2286     if (PetscBTLookup(change_basis,i)) {
2287       if (!pcbddc->use_qr_single) {
2288         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2289         j = 0;
2290         for (k=0;k<size_of_constraint;k++) {
2291           if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
2292             j++;
2293           }
2294         }
2295         /* found more than one primal dof on the cc */
2296         if (j > 1) {
2297           PetscBTSet(qr_needed_idx,i);
2298           qr_needed = PETSC_TRUE;
2299         }
2300       } else {
2301         PetscBTSet(qr_needed_idx,i);
2302         qr_needed = PETSC_TRUE;
2303       }
2304     }
2305   }
2306   /* free workspace */
2307   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2308 
2309   /* permute indices in order to have a sorted set of vertices */
2310   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
2311 
2312   /* nonzero structure of constraint matrix */
2313   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2314   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
2315   j=total_primal_vertices;
2316   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2317     if (!PetscBTLookup(change_basis,i)) {
2318       nnz[j]=temp_indices[i+1]-temp_indices[i];
2319       j++;
2320     }
2321   }
2322   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2323   ierr = PetscFree(nnz);CHKERRQ(ierr);
2324   /* set values in constraint matrix */
2325   for (i=0;i<total_primal_vertices;i++) {
2326     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2327   }
2328   total_counts = total_primal_vertices;
2329   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2330     if (!PetscBTLookup(change_basis,i)) {
2331       size_of_constraint=temp_indices[i+1]-temp_indices[i];
2332       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
2333       total_counts++;
2334     }
2335   }
2336   /* assembling */
2337   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2338   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2339   /*
2340   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2341   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2342   */
2343   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2344   if (pcbddc->use_change_of_basis) {
2345     /* dual and primal dofs on a single cc */
2346     PetscInt     dual_dofs,primal_dofs;
2347     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
2348     PetscInt     primal_counter;
2349     /* working stuff for GEQRF */
2350     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2351     PetscBLASInt lqr_work;
2352     /* working stuff for UNGQR */
2353     PetscScalar  *gqr_work,lgqr_work_t;
2354     PetscBLASInt lgqr_work;
2355     /* working stuff for TRTRS */
2356     PetscScalar  *trs_rhs;
2357     PetscBLASInt Blas_NRHS;
2358     /* pointers for values insertion into change of basis matrix */
2359     PetscInt     *start_rows,*start_cols;
2360     PetscScalar  *start_vals;
2361     /* working stuff for values insertion */
2362     PetscBT      is_primal;
2363     /* matrix sizes */
2364     PetscInt     global_size,local_size;
2365     /* temporary change of basis */
2366     Mat          localChangeOfBasisMatrix;
2367     /* extra space for debugging */
2368     PetscScalar  *dbg_work;
2369 
2370     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2371     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2372     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2373     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2374     /* nonzeros for local mat */
2375     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2376     for (i=0;i<pcis->n;i++) nnz[i]=1;
2377     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2378       if (PetscBTLookup(change_basis,i)) {
2379         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2380         if (PetscBTLookup(qr_needed_idx,i)) {
2381           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2382         } else {
2383           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2;
2384           /* get local primal index on the cc */
2385           j = 0;
2386           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
2387           nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2388         }
2389       }
2390     }
2391     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2392     ierr = PetscFree(nnz);CHKERRQ(ierr);
2393     /* Set initial identity in the matrix */
2394     for (i=0;i<pcis->n;i++) {
2395       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2396     }
2397 
2398     if (pcbddc->dbg_flag) {
2399       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2400       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2401     }
2402 
2403 
2404     /* Now we loop on the constraints which need a change of basis */
2405     /*
2406        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2407        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2408 
2409        Basic blocks of change of basis matrix T computed by
2410 
2411           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2412 
2413             | 1        0   ...        0         s_1/S |
2414             | 0        1   ...        0         s_2/S |
2415             |              ...                        |
2416             | 0        ...            1     s_{n-1}/S |
2417             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2418 
2419             with S = \sum_{i=1}^n s_i^2
2420             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2421                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2422 
2423           - QR decomposition of constraints otherwise
2424     */
2425     if (qr_needed) {
2426       /* space to store Q */
2427       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2428       /* first we issue queries for optimal work */
2429       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2430       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2431       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2432       lqr_work = -1;
2433       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2434       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2435       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2436       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2437       lgqr_work = -1;
2438       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2439       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2440       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2441       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2442       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2443       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2444       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2445       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2446       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2447       /* array to store scaling factors for reflectors */
2448       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2449       /* array to store rhs and solution of triangular solver */
2450       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2451       /* allocating workspace for check */
2452       if (pcbddc->dbg_flag) {
2453         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
2454       }
2455     }
2456     /* array to store whether a node is primal or not */
2457     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2458     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2459     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2460     if (i != total_primal_vertices) {
2461       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2462     }
2463     for (i=0;i<total_primal_vertices;i++) {
2464       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2465     }
2466     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2467 
2468     /* loop on constraints and see whether or not they need a change of basis and compute it */
2469     /* -> using implicit ordering contained in temp_indices data */
2470     total_counts = pcbddc->n_vertices;
2471     primal_counter = total_counts;
2472     while (total_counts<pcbddc->local_primal_size) {
2473       primal_dofs = 1;
2474       if (PetscBTLookup(change_basis,total_counts)) {
2475         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2476         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) {
2477           primal_dofs++;
2478         }
2479         /* get constraint info */
2480         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2481         dual_dofs = size_of_constraint-primal_dofs;
2482 
2483         if (pcbddc->dbg_flag) {
2484           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2485         }
2486 
2487         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2488 
2489           /* copy quadrature constraints for change of basis check */
2490           if (pcbddc->dbg_flag) {
2491             ierr = PetscMemcpy(dbg_work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2492           }
2493           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2494           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2495 
2496           /* compute QR decomposition of constraints */
2497           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2498           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2499           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2500           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2501           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2502           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2503           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2504 
2505           /* explictly compute R^-T */
2506           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2507           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2508           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2509           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2510           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2511           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2512           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2513           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2514           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2515           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2516 
2517           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2518           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2519           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2520           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2521           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2522           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2523           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2524           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2525           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2526 
2527           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2528              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2529              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2530           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2531           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2532           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2533           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2534           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2535           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2536           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2537           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2538           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2539           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2540 
2541           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2542           start_rows = &temp_indices_to_constraint[temp_indices[total_counts]];
2543           /* insert cols for primal dofs */
2544           for (j=0;j<primal_dofs;j++) {
2545             start_vals = &qr_basis[j*size_of_constraint];
2546             start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2547             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2548           }
2549           /* insert cols for dual dofs */
2550           for (j=0,k=0;j<dual_dofs;k++) {
2551             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2552               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2553               start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k];
2554               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2555               j++;
2556             }
2557           }
2558 
2559           /* check change of basis */
2560           if (pcbddc->dbg_flag) {
2561             PetscInt   ii,jj;
2562             PetscBool valid_qr=PETSC_TRUE;
2563             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2564             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2565             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2566             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2567             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2568             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2569             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2570             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
2571             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2572             for (jj=0;jj<size_of_constraint;jj++) {
2573               for (ii=0;ii<primal_dofs;ii++) {
2574                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2575                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2576               }
2577             }
2578             if (!valid_qr) {
2579               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2580               for (jj=0;jj<size_of_constraint;jj++) {
2581                 for (ii=0;ii<primal_dofs;ii++) {
2582                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2583                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2584                   }
2585                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2586                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2587                   }
2588                 }
2589               }
2590             } else {
2591               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2592             }
2593           }
2594         } else { /* simple transformation block */
2595           PetscInt    row,col;
2596           PetscScalar val,norm;
2597 
2598           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2599           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one));
2600           for (j=0;j<size_of_constraint;j++) {
2601             PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2602             row = temp_indices_to_constraint[temp_indices[total_counts]+j];
2603             if (!PetscBTLookup(is_primal,row_B)) {
2604               col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2605               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2606               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2607             } else {
2608               for (k=0;k<size_of_constraint;k++) {
2609                 col = temp_indices_to_constraint[temp_indices[total_counts]+k];
2610                 if (row != col) {
2611                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2612                 } else {
2613                   val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm;
2614                 }
2615                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2616               }
2617             }
2618           }
2619           if (pcbddc->dbg_flag) {
2620             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2621           }
2622         }
2623         /* increment primal counter */
2624         primal_counter += primal_dofs;
2625       } else {
2626         if (pcbddc->dbg_flag) {
2627           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2628         }
2629       }
2630       /* increment constraint counter total_counts */
2631       total_counts += primal_dofs;
2632     }
2633 
2634     /* free workspace */
2635     if (qr_needed) {
2636       if (pcbddc->dbg_flag) {
2637         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
2638       }
2639       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2640       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2641       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2642       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2643       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2644     }
2645     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2646     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2647     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2648 
2649     /* assembling of global change of variable */
2650     {
2651       Mat      tmat;
2652       PetscInt bs;
2653 
2654       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2655       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2656       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2657       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2658       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2659       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2660       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
2661       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
2662       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2663       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2664       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2665       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2666       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2667       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2668       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2669       ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2670       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2671       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2672     }
2673     /* check */
2674     if (pcbddc->dbg_flag) {
2675       PetscReal error;
2676       Vec       x,x_change;
2677 
2678       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2679       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2680       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2681       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2682       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2683       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2684       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2685       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2686       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2687       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2688       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2689       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2690       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2691       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2692       ierr = VecDestroy(&x);CHKERRQ(ierr);
2693       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2694     }
2695 
2696     /* adapt sub_schurs computed (if any) */
2697     if (pcbddc->use_deluxe_scaling) {
2698       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2699       if (sub_schurs->n_subs_par_g) {
2700         SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Change of basis with deluxe scaling and parallel problems still needs to be implemented");
2701       }
2702       if (sub_schurs->S_Ej_all) {
2703         Mat S_1,S_2,tmat;
2704         IS is_all_N;
2705 
2706         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2707         ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,&tmat);CHKERRQ(ierr);
2708         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2709         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr);
2710         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2711         sub_schurs->S_Ej_all = S_1;
2712         ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr);
2713         ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2714         sub_schurs->sum_S_Ej_all = S_2;
2715         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2716       }
2717     }
2718     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2719   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2720     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2721     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2722   }
2723 
2724   /* set up change of basis context */
2725   if (pcbddc->ChangeOfBasisMatrix) {
2726     PCBDDCChange_ctx change_ctx;
2727 
2728     if (!pcbddc->new_global_mat) {
2729       PetscInt global_size,local_size;
2730 
2731       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2732       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2733       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2734       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2735       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2736       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2737       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2738       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2739       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2740     } else {
2741       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2742       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2743       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2744     }
2745     if (!pcbddc->user_ChangeOfBasisMatrix) {
2746       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2747       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2748     } else {
2749       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2750       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2751     }
2752     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2753     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2754     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2755     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2756   }
2757 
2758   /* get indices in local ordering for vertices and constraints */
2759   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2760     ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
2761     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2762   }
2763   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2764   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2765   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2766   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2767   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2768   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2769   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2770   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2771   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2772   /* set quantities in PCBDDC data struct */
2773   pcbddc->n_actual_vertices = i;
2774   /* check if a new primal space has been introduced */
2775   pcbddc->new_primal_space_local = PETSC_TRUE;
2776   if (olocal_primal_size == pcbddc->local_primal_size) {
2777     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2778     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2779     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2780   }
2781   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2782   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2783 
2784   /* flush dbg viewer */
2785   if (pcbddc->dbg_flag) {
2786     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2787   }
2788 
2789   /* free workspace */
2790   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2791   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2792   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2793   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2794   if (!pcbddc->adaptive_selection) {
2795     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2796     ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr);
2797   } else {
2798     ierr = PetscFree4(pcbddc->adaptive_constraints_n,
2799                       pcbddc->adaptive_constraints_ptrs,
2800                       pcbddc->adaptive_constraints_idxs,
2801                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2802     ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2803   }
2804   PetscFunctionReturn(0);
2805 }
2806 
2807 #undef __FUNCT__
2808 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2809 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2810 {
2811   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2812   PC_IS       *pcis = (PC_IS*)pc->data;
2813   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2814   PetscInt    ierr,i,vertex_size;
2815   PetscViewer viewer=pcbddc->dbg_viewer;
2816 
2817   PetscFunctionBegin;
2818   /* Reset previously computed graph */
2819   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2820   /* Init local Graph struct */
2821   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2822 
2823   /* Check validity of the csr graph passed in by the user */
2824   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2825     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2826   }
2827 
2828   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2829   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2830     PetscInt *xadj,*adjncy;
2831     PetscInt nvtxs;
2832 
2833     if (pcbddc->use_local_adj) {
2834       PetscBool flg_row=PETSC_FALSE;
2835 
2836       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2837       if (flg_row) {
2838         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2839         pcbddc->computed_rowadj = PETSC_TRUE;
2840       }
2841       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2842     } else if (pcbddc->current_level) { /* just compute subdomain's connected components for coarser levels */
2843       IS                     is_dummy;
2844       ISLocalToGlobalMapping l2gmap_dummy;
2845       PetscInt               j,sum;
2846       PetscInt               *cxadj,*cadjncy;
2847       const PetscInt         *idxs;
2848       PCBDDCGraph            graph;
2849       PetscBT                is_on_boundary;
2850 
2851       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
2852       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
2853       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
2854       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
2855       ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr);
2856       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
2857       graph->xadj = xadj;
2858       graph->adjncy = adjncy;
2859       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
2860       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
2861 
2862       if (pcbddc->dbg_flag) {
2863         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr);
2864         for (i=0;i<graph->ncc;i++) {
2865           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
2866         }
2867         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2868       }
2869 
2870       ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr);
2871       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2872       for (i=0;i<pcis->n_B;i++) {
2873         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
2874       }
2875       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2876 
2877       ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr);
2878       sum = 0;
2879       for (i=0;i<graph->ncc;i++) {
2880         PetscInt sizecc = 0;
2881         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2882           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2883             sizecc++;
2884           }
2885         }
2886         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2887           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2888             cxadj[graph->queue[j]] = sizecc;
2889           }
2890         }
2891         sum += sizecc*sizecc;
2892       }
2893       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
2894       sum = 0;
2895       for (i=0;i<nvtxs;i++) {
2896         PetscInt temp = cxadj[i];
2897         cxadj[i] = sum;
2898         sum += temp;
2899       }
2900       cxadj[nvtxs] = sum;
2901       for (i=0;i<graph->ncc;i++) {
2902         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2903           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2904             PetscInt k,sizecc = 0;
2905             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
2906               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
2907                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
2908                 sizecc++;
2909               }
2910             }
2911           }
2912         }
2913       }
2914       if (nvtxs) {
2915         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
2916       } else {
2917         ierr = PetscFree(cxadj);CHKERRQ(ierr);
2918         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
2919       }
2920       graph->xadj = 0;
2921       graph->adjncy = 0;
2922       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
2923       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
2924     }
2925   }
2926 
2927   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2928   vertex_size = 1;
2929   if (pcbddc->user_provided_isfordofs) {
2930     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2931       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2932       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2933         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2934         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2935       }
2936       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2937       pcbddc->n_ISForDofs = 0;
2938       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2939     }
2940     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2941     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2942   } else {
2943     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2944       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2945       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2946       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2947         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2948       }
2949     }
2950   }
2951 
2952   /* Setup of Graph */
2953   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2954     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2955   }
2956   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2957     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2958   }
2959   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
2960 
2961   /* Graph's connected components analysis */
2962   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2963 
2964   /* print some info to stdout */
2965   if (pcbddc->dbg_flag) {
2966     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2967   }
2968 
2969   /* mark topography has done */
2970   pcbddc->recompute_topography = PETSC_FALSE;
2971   PetscFunctionReturn(0);
2972 }
2973 
2974 #undef __FUNCT__
2975 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2976 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2977 {
2978   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2979   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2980   PetscErrorCode ierr;
2981 
2982   PetscFunctionBegin;
2983   n = 0;
2984   vertices = 0;
2985   if (pcbddc->ConstraintMatrix) {
2986     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2987     for (i=0;i<local_primal_size;i++) {
2988       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2989       if (size_of_constraint == 1) n++;
2990       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2991     }
2992     if (vertices_idx) {
2993       ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr);
2994       n = 0;
2995       for (i=0;i<local_primal_size;i++) {
2996         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2997         if (size_of_constraint == 1) {
2998           vertices[n++]=row_cmat_indices[0];
2999         }
3000         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3001       }
3002     }
3003   }
3004   *n_vertices = n;
3005   if (vertices_idx) *vertices_idx = vertices;
3006   PetscFunctionReturn(0);
3007 }
3008 
3009 #undef __FUNCT__
3010 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
3011 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
3012 {
3013   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
3014   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
3015   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
3016   PetscBT        touched;
3017   PetscErrorCode ierr;
3018 
3019     /* This function assumes that the number of local constraints per connected component
3020        is not greater than the number of nodes defined for the connected component
3021        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
3022   PetscFunctionBegin;
3023   n = 0;
3024   constraints_index = 0;
3025   if (pcbddc->ConstraintMatrix) {
3026     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
3027     max_size_of_constraint = 0;
3028     for (i=0;i<local_primal_size;i++) {
3029       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3030       if (size_of_constraint > 1) {
3031         n++;
3032       }
3033       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
3034       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3035     }
3036     if (constraints_idx) {
3037       ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr);
3038       ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr);
3039       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
3040       n = 0;
3041       for (i=0;i<local_primal_size;i++) {
3042         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3043         if (size_of_constraint > 1) {
3044           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
3045           /* find first untouched local node */
3046           j = 0;
3047           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
3048           min_index = row_cmat_global_indices[j];
3049           min_loc = j;
3050           /* search the minimum among nodes not yet touched on the connected component
3051              since there can be more than one constraint on a single cc */
3052           for (j=1;j<size_of_constraint;j++) {
3053             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
3054               min_index = row_cmat_global_indices[j];
3055               min_loc = j;
3056             }
3057           }
3058           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
3059           constraints_index[n++] = row_cmat_indices[min_loc];
3060         }
3061         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3062       }
3063       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
3064       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
3065     }
3066   }
3067   *n_constraints = n;
3068   if (constraints_idx) *constraints_idx = constraints_index;
3069   PetscFunctionReturn(0);
3070 }
3071 
3072 #undef __FUNCT__
3073 #define __FUNCT__ "PCBDDCSubsetNumbering"
3074 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
3075 {
3076   Vec            local_vec,global_vec;
3077   IS             seqis,paris;
3078   VecScatter     scatter_ctx;
3079   PetscScalar    *array;
3080   PetscInt       *temp_global_dofs;
3081   PetscScalar    globalsum;
3082   PetscInt       i,j,s;
3083   PetscInt       nlocals,first_index,old_index,max_local;
3084   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
3085   PetscMPIInt    *dof_sizes,*dof_displs;
3086   PetscBool      first_found;
3087   PetscErrorCode ierr;
3088 
3089   PetscFunctionBegin;
3090   /* mpi buffers */
3091   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
3092   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
3093   j = ( !rank_prec_comm ? size_prec_comm : 0);
3094   ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr);
3095   ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr);
3096   /* get maximum size of subset */
3097   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
3098   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
3099   max_local = 0;
3100   for (i=0;i<n_local_dofs;i++) {
3101     if (max_local < temp_global_dofs[i] ) {
3102       max_local = temp_global_dofs[i];
3103     }
3104   }
3105   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
3106   max_global++;
3107   max_local = 0;
3108   for (i=0;i<n_local_dofs;i++) {
3109     if (max_local < local_dofs[i] ) {
3110       max_local = local_dofs[i];
3111     }
3112   }
3113   max_local++;
3114   /* allocate workspace */
3115   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
3116   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
3117   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
3118   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
3119   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
3120   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
3121   /* create scatter */
3122   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
3123   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
3124   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
3125   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
3126   ierr = ISDestroy(&paris);CHKERRQ(ierr);
3127   /* init array */
3128   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
3129   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3130   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3131   if (local_dofs_mult) {
3132     for (i=0;i<n_local_dofs;i++) {
3133       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
3134     }
3135   } else {
3136     for (i=0;i<n_local_dofs;i++) {
3137       array[local_dofs[i]]=1.0;
3138     }
3139   }
3140   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3141   /* scatter into global vec and get total number of global dofs */
3142   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3143   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3144   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
3145   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
3146   /* Fill global_vec with cumulative function for global numbering */
3147   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
3148   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
3149   nlocals = 0;
3150   first_index = -1;
3151   first_found = PETSC_FALSE;
3152   for (i=0;i<s;i++) {
3153     if (!first_found && PetscRealPart(array[i]) > 0.1) {
3154       first_found = PETSC_TRUE;
3155       first_index = i;
3156     }
3157     nlocals += (PetscInt)PetscRealPart(array[i]);
3158   }
3159   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3160   if (!rank_prec_comm) {
3161     dof_displs[0]=0;
3162     for (i=1;i<size_prec_comm;i++) {
3163       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3164     }
3165   }
3166   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3167   if (first_found) {
3168     array[first_index] += (PetscScalar)nlocals;
3169     old_index = first_index;
3170     for (i=first_index+1;i<s;i++) {
3171       if (PetscRealPart(array[i]) > 0.1) {
3172         array[i] += array[old_index];
3173         old_index = i;
3174       }
3175     }
3176   }
3177   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
3178   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3179   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3180   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3181   /* get global ordering of local dofs */
3182   ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3183   if (local_dofs_mult) {
3184     for (i=0;i<n_local_dofs;i++) {
3185       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
3186     }
3187   } else {
3188     for (i=0;i<n_local_dofs;i++) {
3189       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
3190     }
3191   }
3192   ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3193   /* free workspace */
3194   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
3195   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
3196   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
3197   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
3198   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
3199   /* return pointer to global ordering of local dofs */
3200   *global_numbering_subset = temp_global_dofs;
3201   PetscFunctionReturn(0);
3202 }
3203 
3204 #undef __FUNCT__
3205 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3206 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3207 {
3208   PetscInt       i,j;
3209   PetscScalar    *alphas;
3210   PetscErrorCode ierr;
3211 
3212   PetscFunctionBegin;
3213   /* this implements stabilized Gram-Schmidt */
3214   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3215   for (i=0;i<n;i++) {
3216     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3217     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3218     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3219   }
3220   ierr = PetscFree(alphas);CHKERRQ(ierr);
3221   PetscFunctionReturn(0);
3222 }
3223 
3224 #undef __FUNCT__
3225 #define __FUNCT__ "MatISGetSubassemblingPattern"
3226 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
3227 {
3228   Mat             subdomain_adj;
3229   IS              new_ranks,ranks_send_to;
3230   MatPartitioning partitioner;
3231   Mat_IS          *matis;
3232   PetscInt        n_neighs,*neighs,*n_shared,**shared;
3233   PetscInt        prank;
3234   PetscMPIInt     size,rank,color;
3235   PetscInt        *xadj,*adjncy,*oldranks;
3236   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
3237   PetscInt        i,local_size,threshold=0;
3238   PetscErrorCode  ierr;
3239   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3240   PetscSubcomm    subcomm;
3241 
3242   PetscFunctionBegin;
3243   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3244   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3245   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3246 
3247   /* Get info on mapping */
3248   matis = (Mat_IS*)(mat->data);
3249   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3250   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3251 
3252   /* build local CSR graph of subdomains' connectivity */
3253   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3254   xadj[0] = 0;
3255   xadj[1] = PetscMax(n_neighs-1,0);
3256   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3257   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3258 
3259   if (threshold) {
3260     PetscInt xadj_count = 0;
3261     for (i=1;i<n_neighs;i++) {
3262       if (n_shared[i] > threshold) {
3263         adjncy[xadj_count] = neighs[i];
3264         adjncy_wgt[xadj_count] = n_shared[i];
3265         xadj_count++;
3266       }
3267     }
3268     xadj[1] = xadj_count;
3269   } else {
3270     if (xadj[1]) {
3271       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3272       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3273     }
3274   }
3275   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3276   if (use_square) {
3277     for (i=0;i<xadj[1];i++) {
3278       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3279     }
3280   }
3281   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3282 
3283   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3284 
3285   /*
3286     Restrict work on active processes only.
3287   */
3288   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3289   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3290   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3291   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3292   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3293   if (color) {
3294     ierr = PetscFree(xadj);CHKERRQ(ierr);
3295     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3296     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3297   } else {
3298     PetscInt coarsening_ratio;
3299     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
3300     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3301     prank = rank;
3302     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
3303     /*
3304     for (i=0;i<size;i++) {
3305       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3306     }
3307     */
3308     for (i=0;i<xadj[1];i++) {
3309       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3310     }
3311     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3312     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3313     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3314 
3315     /* Partition */
3316     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
3317     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3318     if (use_vwgt) {
3319       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3320       v_wgt[0] = local_size;
3321       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3322     }
3323     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3324     coarsening_ratio = size/n_subdomains;
3325     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3326     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3327     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3328     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3329 
3330     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3331     if (contiguous) {
3332       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3333     } else {
3334       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3335     }
3336     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3337     /* clean up */
3338     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3339     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3340     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3341     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3342   }
3343   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3344 
3345   /* assemble parallel IS for sends */
3346   i = 1;
3347   if (color) i=0;
3348   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3349 
3350   /* get back IS */
3351   *is_sends = ranks_send_to;
3352   PetscFunctionReturn(0);
3353 }
3354 
3355 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3356 
3357 #undef __FUNCT__
3358 #define __FUNCT__ "MatISSubassemble"
3359 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3360 {
3361   Mat                    local_mat;
3362   Mat_IS                 *matis;
3363   IS                     is_sends_internal;
3364   PetscInt               rows,cols,new_local_rows;
3365   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3366   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3367   ISLocalToGlobalMapping l2gmap;
3368   PetscInt*              l2gmap_indices;
3369   const PetscInt*        is_indices;
3370   MatType                new_local_type;
3371   /* buffers */
3372   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3373   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3374   PetscInt               *recv_buffer_idxs_local;
3375   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3376   /* MPI */
3377   MPI_Comm               comm,comm_n;
3378   PetscSubcomm           subcomm;
3379   PetscMPIInt            n_sends,n_recvs,commsize;
3380   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3381   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3382   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3383   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3384   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3385   PetscErrorCode         ierr;
3386 
3387   PetscFunctionBegin;
3388   /* TODO: add missing checks */
3389   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3390   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3391   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3392   PetscValidLogicalCollectiveInt(mat,nis,7);
3393   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3394   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3395   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3396   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3397   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3398   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3399   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3400   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3401     PetscInt mrows,mcols,mnrows,mncols;
3402     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3403     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3404     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3405     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3406     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3407     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3408   }
3409   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3410   PetscValidLogicalCollectiveInt(mat,bs,0);
3411   /* prepare IS for sending if not provided */
3412   if (!is_sends) {
3413     PetscBool pcontig = PETSC_TRUE;
3414     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3415     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3416   } else {
3417     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3418     is_sends_internal = is_sends;
3419   }
3420 
3421   /* get pointer of MATIS data */
3422   matis = (Mat_IS*)mat->data;
3423 
3424   /* get comm */
3425   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3426 
3427   /* compute number of sends */
3428   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3429   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3430 
3431   /* compute number of receives */
3432   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3433   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3434   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3435   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3436   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3437   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3438   ierr = PetscFree(iflags);CHKERRQ(ierr);
3439 
3440   /* restrict comm if requested */
3441   subcomm = 0;
3442   destroy_mat = PETSC_FALSE;
3443   if (restrict_comm) {
3444     PetscMPIInt color,subcommsize;
3445 
3446     color = 0;
3447     if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */
3448     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3449     subcommsize = commsize - subcommsize;
3450     /* check if reuse has been requested */
3451     if (reuse == MAT_REUSE_MATRIX) {
3452       if (*mat_n) {
3453         PetscMPIInt subcommsize2;
3454         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3455         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3456         comm_n = PetscObjectComm((PetscObject)*mat_n);
3457       } else {
3458         comm_n = PETSC_COMM_SELF;
3459       }
3460     } else { /* MAT_INITIAL_MATRIX */
3461       PetscMPIInt rank;
3462 
3463       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3464       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3465       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3466       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3467       comm_n = subcomm->comm;
3468     }
3469     /* flag to destroy *mat_n if not significative */
3470     if (color) destroy_mat = PETSC_TRUE;
3471   } else {
3472     comm_n = comm;
3473   }
3474 
3475   /* prepare send/receive buffers */
3476   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3477   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3478   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3479   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3480   if (nis) {
3481     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3482   }
3483 
3484   /* Get data from local matrices */
3485   if (!isdense) {
3486     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3487     /* TODO: See below some guidelines on how to prepare the local buffers */
3488     /*
3489        send_buffer_vals should contain the raw values of the local matrix
3490        send_buffer_idxs should contain:
3491        - MatType_PRIVATE type
3492        - PetscInt        size_of_l2gmap
3493        - PetscInt        global_row_indices[size_of_l2gmap]
3494        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3495     */
3496   } else {
3497     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3498     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3499     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3500     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3501     send_buffer_idxs[1] = i;
3502     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3503     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3504     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3505     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3506     for (i=0;i<n_sends;i++) {
3507       ilengths_vals[is_indices[i]] = len*len;
3508       ilengths_idxs[is_indices[i]] = len+2;
3509     }
3510   }
3511   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3512   /* additional is (if any) */
3513   if (nis) {
3514     PetscMPIInt psum;
3515     PetscInt j;
3516     for (j=0,psum=0;j<nis;j++) {
3517       PetscInt plen;
3518       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3519       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3520       psum += len+1; /* indices + lenght */
3521     }
3522     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3523     for (j=0,psum=0;j<nis;j++) {
3524       PetscInt plen;
3525       const PetscInt *is_array_idxs;
3526       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3527       send_buffer_idxs_is[psum] = plen;
3528       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3529       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3530       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3531       psum += plen+1; /* indices + lenght */
3532     }
3533     for (i=0;i<n_sends;i++) {
3534       ilengths_idxs_is[is_indices[i]] = psum;
3535     }
3536     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3537   }
3538 
3539   buf_size_idxs = 0;
3540   buf_size_vals = 0;
3541   buf_size_idxs_is = 0;
3542   for (i=0;i<n_recvs;i++) {
3543     buf_size_idxs += (PetscInt)olengths_idxs[i];
3544     buf_size_vals += (PetscInt)olengths_vals[i];
3545     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3546   }
3547   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3548   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3549   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3550 
3551   /* get new tags for clean communications */
3552   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3553   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3554   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3555 
3556   /* allocate for requests */
3557   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3558   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3559   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3560   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3561   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3562   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3563 
3564   /* communications */
3565   ptr_idxs = recv_buffer_idxs;
3566   ptr_vals = recv_buffer_vals;
3567   ptr_idxs_is = recv_buffer_idxs_is;
3568   for (i=0;i<n_recvs;i++) {
3569     source_dest = onodes[i];
3570     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3571     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3572     ptr_idxs += olengths_idxs[i];
3573     ptr_vals += olengths_vals[i];
3574     if (nis) {
3575       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3576       ptr_idxs_is += olengths_idxs_is[i];
3577     }
3578   }
3579   for (i=0;i<n_sends;i++) {
3580     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3581     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3582     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3583     if (nis) {
3584       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3585     }
3586   }
3587   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3588   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3589 
3590   /* assemble new l2g map */
3591   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3592   ptr_idxs = recv_buffer_idxs;
3593   new_local_rows = 0;
3594   for (i=0;i<n_recvs;i++) {
3595     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3596     ptr_idxs += olengths_idxs[i];
3597   }
3598   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3599   ptr_idxs = recv_buffer_idxs;
3600   new_local_rows = 0;
3601   for (i=0;i<n_recvs;i++) {
3602     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3603     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3604     ptr_idxs += olengths_idxs[i];
3605   }
3606   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3607   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3608   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3609 
3610   /* infer new local matrix type from received local matrices type */
3611   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3612   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3613   if (n_recvs) {
3614     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3615     ptr_idxs = recv_buffer_idxs;
3616     for (i=0;i<n_recvs;i++) {
3617       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3618         new_local_type_private = MATAIJ_PRIVATE;
3619         break;
3620       }
3621       ptr_idxs += olengths_idxs[i];
3622     }
3623     switch (new_local_type_private) {
3624       case MATDENSE_PRIVATE:
3625         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3626           new_local_type = MATSEQAIJ;
3627           bs = 1;
3628         } else { /* if I receive only 1 dense matrix */
3629           new_local_type = MATSEQDENSE;
3630           bs = 1;
3631         }
3632         break;
3633       case MATAIJ_PRIVATE:
3634         new_local_type = MATSEQAIJ;
3635         bs = 1;
3636         break;
3637       case MATBAIJ_PRIVATE:
3638         new_local_type = MATSEQBAIJ;
3639         break;
3640       case MATSBAIJ_PRIVATE:
3641         new_local_type = MATSEQSBAIJ;
3642         break;
3643       default:
3644         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3645         break;
3646     }
3647   } else { /* by default, new_local_type is seqdense */
3648     new_local_type = MATSEQDENSE;
3649     bs = 1;
3650   }
3651 
3652   /* create MATIS object if needed */
3653   if (reuse == MAT_INITIAL_MATRIX) {
3654     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3655     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3656   } else {
3657     /* it also destroys the local matrices */
3658     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3659   }
3660   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3661   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3662 
3663   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3664 
3665   /* Global to local map of received indices */
3666   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3667   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3668   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3669 
3670   /* restore attributes -> type of incoming data and its size */
3671   buf_size_idxs = 0;
3672   for (i=0;i<n_recvs;i++) {
3673     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3674     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3675     buf_size_idxs += (PetscInt)olengths_idxs[i];
3676   }
3677   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3678 
3679   /* set preallocation */
3680   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3681   if (!newisdense) {
3682     PetscInt *new_local_nnz=0;
3683 
3684     ptr_vals = recv_buffer_vals;
3685     ptr_idxs = recv_buffer_idxs_local;
3686     if (n_recvs) {
3687       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3688     }
3689     for (i=0;i<n_recvs;i++) {
3690       PetscInt j;
3691       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3692         for (j=0;j<*(ptr_idxs+1);j++) {
3693           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3694         }
3695       } else {
3696         /* TODO */
3697       }
3698       ptr_idxs += olengths_idxs[i];
3699     }
3700     if (new_local_nnz) {
3701       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3702       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3703       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3704       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3705       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3706       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3707     } else {
3708       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3709     }
3710     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3711   } else {
3712     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3713   }
3714 
3715   /* set values */
3716   ptr_vals = recv_buffer_vals;
3717   ptr_idxs = recv_buffer_idxs_local;
3718   for (i=0;i<n_recvs;i++) {
3719     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3720       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3721       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3722       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3723       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3724       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3725     } else {
3726       /* TODO */
3727     }
3728     ptr_idxs += olengths_idxs[i];
3729     ptr_vals += olengths_vals[i];
3730   }
3731   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3732   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3733   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3734   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3735   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3736   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3737 
3738 #if 0
3739   if (!restrict_comm) { /* check */
3740     Vec       lvec,rvec;
3741     PetscReal infty_error;
3742 
3743     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3744     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3745     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3746     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3747     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3748     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3749     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3750     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3751     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3752   }
3753 #endif
3754 
3755   /* assemble new additional is (if any) */
3756   if (nis) {
3757     PetscInt **temp_idxs,*count_is,j,psum;
3758 
3759     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3760     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3761     ptr_idxs = recv_buffer_idxs_is;
3762     psum = 0;
3763     for (i=0;i<n_recvs;i++) {
3764       for (j=0;j<nis;j++) {
3765         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3766         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3767         psum += plen;
3768         ptr_idxs += plen+1; /* shift pointer to received data */
3769       }
3770     }
3771     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3772     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3773     for (i=1;i<nis;i++) {
3774       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3775     }
3776     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3777     ptr_idxs = recv_buffer_idxs_is;
3778     for (i=0;i<n_recvs;i++) {
3779       for (j=0;j<nis;j++) {
3780         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3781         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3782         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3783         ptr_idxs += plen+1; /* shift pointer to received data */
3784       }
3785     }
3786     for (i=0;i<nis;i++) {
3787       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3788       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3789       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3790     }
3791     ierr = PetscFree(count_is);CHKERRQ(ierr);
3792     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3793     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3794   }
3795   /* free workspace */
3796   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3797   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3798   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3799   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3800   if (isdense) {
3801     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3802     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3803   } else {
3804     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3805   }
3806   if (nis) {
3807     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3808     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3809   }
3810   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3811   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3812   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3813   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3814   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3815   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3816   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3817   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3818   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3819   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3820   ierr = PetscFree(onodes);CHKERRQ(ierr);
3821   if (nis) {
3822     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3823     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3824     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3825   }
3826   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3827   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3828     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3829     for (i=0;i<nis;i++) {
3830       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3831     }
3832   }
3833   PetscFunctionReturn(0);
3834 }
3835 
3836 /* temporary hack into ksp private data structure */
3837 #include <petsc-private/kspimpl.h>
3838 
3839 #undef __FUNCT__
3840 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3841 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3842 {
3843   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3844   PC_IS                  *pcis = (PC_IS*)pc->data;
3845   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3846   MatNullSpace           CoarseNullSpace=NULL;
3847   ISLocalToGlobalMapping coarse_islg;
3848   IS                     coarse_is,*isarray;
3849   PetscInt               i,im_active=-1,active_procs=-1;
3850   PetscInt               nis,nisdofs,nisneu;
3851   PC                     pc_temp;
3852   PCType                 coarse_pc_type;
3853   KSPType                coarse_ksp_type;
3854   PetscBool              multilevel_requested,multilevel_allowed;
3855   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3856   Mat                    t_coarse_mat_is;
3857   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3858   PetscMPIInt            all_procs;
3859   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3860   PetscBool              compute_vecs = PETSC_FALSE;
3861   PetscScalar            *array;
3862   PetscErrorCode         ierr;
3863 
3864   PetscFunctionBegin;
3865   /* Assign global numbering to coarse dofs */
3866   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3867     compute_vecs = PETSC_TRUE;
3868     PetscInt ocoarse_size;
3869     ocoarse_size = pcbddc->coarse_size;
3870     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3871     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3872     /* see if we can avoid some work */
3873     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3874       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3875         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3876         coarse_reuse = PETSC_FALSE;
3877       } else { /* we can safely reuse already computed coarse matrix */
3878         coarse_reuse = PETSC_TRUE;
3879       }
3880     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3881       coarse_reuse = PETSC_FALSE;
3882     }
3883     /* reset any subassembling information */
3884     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3885     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3886   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3887     coarse_reuse = PETSC_TRUE;
3888   }
3889 
3890   /* count "active" (i.e. with positive local size) and "void" processes */
3891   im_active = !!(pcis->n);
3892   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3893   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3894   void_procs = all_procs-active_procs;
3895   csin_type_simple = PETSC_TRUE;
3896   redist = PETSC_FALSE;
3897   if (pcbddc->current_level && void_procs) {
3898     csin_ml = PETSC_TRUE;
3899     ncoarse_ml = void_procs;
3900     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
3901     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
3902       csin_ds = PETSC_TRUE;
3903       ncoarse_ds = pcbddc->redistribute_coarse;
3904       redist = PETSC_TRUE;
3905     } else {
3906       csin_ds = PETSC_TRUE;
3907       ncoarse_ds = active_procs;
3908       redist = PETSC_TRUE;
3909     }
3910   } else {
3911     csin_ml = PETSC_FALSE;
3912     ncoarse_ml = all_procs;
3913     if (void_procs) {
3914       csin_ds = PETSC_TRUE;
3915       ncoarse_ds = void_procs;
3916       csin_type_simple = PETSC_FALSE;
3917     } else {
3918       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
3919         csin_ds = PETSC_TRUE;
3920         ncoarse_ds = pcbddc->redistribute_coarse;
3921         redist = PETSC_TRUE;
3922       } else {
3923         csin_ds = PETSC_FALSE;
3924         ncoarse_ds = all_procs;
3925       }
3926     }
3927   }
3928 
3929   /*
3930     test if we can go multilevel: three conditions must be satisfied:
3931     - we have not exceeded the number of levels requested
3932     - we can actually subassemble the active processes
3933     - we can find a suitable number of MPI processes where we can place the subassembled problem
3934   */
3935   multilevel_allowed = PETSC_FALSE;
3936   multilevel_requested = PETSC_FALSE;
3937   if (pcbddc->current_level < pcbddc->max_levels) {
3938     multilevel_requested = PETSC_TRUE;
3939     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3940       multilevel_allowed = PETSC_FALSE;
3941     } else {
3942       multilevel_allowed = PETSC_TRUE;
3943     }
3944   }
3945   /* determine number of process partecipating to coarse solver */
3946   if (multilevel_allowed) {
3947     ncoarse = ncoarse_ml;
3948     csin = csin_ml;
3949     redist = PETSC_FALSE;
3950   } else {
3951     ncoarse = ncoarse_ds;
3952     csin = csin_ds;
3953   }
3954 
3955   /* creates temporary l2gmap and IS for coarse indexes */
3956   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3957   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3958 
3959   /* creates temporary MATIS object for coarse matrix */
3960   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
3961   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3962   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
3963   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3964 #if 0
3965   {
3966     PetscViewer viewer;
3967     char filename[256];
3968     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
3969     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
3970     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3971     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
3972     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3973   }
3974 #endif
3975   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
3976   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3977   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3978   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3979   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3980 
3981   /* compute dofs splitting and neumann boundaries for coarse dofs */
3982   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
3983     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3984     const PetscInt         *idxs;
3985     ISLocalToGlobalMapping tmap;
3986 
3987     /* create map between primal indices (in local representative ordering) and local primal numbering */
3988     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3989     /* allocate space for temporary storage */
3990     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
3991     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
3992     /* allocate for IS array */
3993     nisdofs = pcbddc->n_ISForDofsLocal;
3994     nisneu = !!pcbddc->NeumannBoundariesLocal;
3995     nis = nisdofs + nisneu;
3996     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
3997     /* dofs splitting */
3998     for (i=0;i<nisdofs;i++) {
3999       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4000       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4001       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4002       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4003       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4004       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4005       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4006       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4007     }
4008     /* neumann boundaries */
4009     if (pcbddc->NeumannBoundariesLocal) {
4010       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4011       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4012       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4013       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4014       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4015       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4016       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4017       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4018     }
4019     /* free memory */
4020     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4021     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4022     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4023   } else {
4024     nis = 0;
4025     nisdofs = 0;
4026     nisneu = 0;
4027     isarray = NULL;
4028   }
4029   /* destroy no longer needed map */
4030   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4031 
4032   /* restrict on coarse candidates (if needed) */
4033   coarse_mat_is = NULL;
4034   if (csin) {
4035     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4036       if (redist) {
4037         PetscMPIInt rank;
4038         PetscInt    spc,n_spc_p1,dest[1],destsize;
4039 
4040         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4041         spc = active_procs/ncoarse;
4042         n_spc_p1 = active_procs%ncoarse;
4043         if (im_active) {
4044           destsize = 1;
4045           if (rank > n_spc_p1*(spc+1)-1) {
4046             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4047           } else {
4048             dest[0] = rank/(spc+1);
4049           }
4050         } else {
4051           destsize = 0;
4052         }
4053         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4054       } else if (csin_type_simple) {
4055         PetscMPIInt rank;
4056         PetscInt    issize,isidx;
4057 
4058         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4059         if (im_active) {
4060           issize = 1;
4061           isidx = (PetscInt)rank;
4062         } else {
4063           issize = 0;
4064           isidx = -1;
4065         }
4066         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4067       } else { /* get a suitable subassembling pattern from MATIS code */
4068         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4069       }
4070 
4071       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4072       if (!redist || ncoarse <= void_procs) {
4073         PetscInt ncoarse_cand,tissize,*nisindices;
4074         PetscInt *coarse_candidates;
4075         const PetscInt* tisindices;
4076 
4077         /* get coarse candidates' ranks in pc communicator */
4078         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4079         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4080         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4081           if (!coarse_candidates[i]) {
4082             coarse_candidates[ncoarse_cand++]=i;
4083           }
4084         }
4085         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4086 
4087 
4088         if (pcbddc->dbg_flag) {
4089           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4090           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4091           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4092           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4093           for (i=0;i<ncoarse_cand;i++) {
4094             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4095           }
4096           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4097           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4098         }
4099         /* shift the pattern on coarse candidates */
4100         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4101         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4102         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4103         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4104         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4105         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4106         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4107       }
4108       if (pcbddc->dbg_flag) {
4109         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4110         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4111         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4112         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4113       }
4114     }
4115     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4116     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4117   } else {
4118     if (pcbddc->dbg_flag) {
4119       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4120       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4121       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4122     }
4123     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4124     coarse_mat_is = t_coarse_mat_is;
4125   }
4126 
4127   /* create local to global scatters for coarse problem */
4128   if (compute_vecs) {
4129     PetscInt lrows;
4130     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4131     if (coarse_mat_is) {
4132       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4133     } else {
4134       lrows = 0;
4135     }
4136     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4137     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4138     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4139     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4140     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4141   }
4142   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4143   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4144 
4145   /* set defaults for coarse KSP and PC */
4146   if (multilevel_allowed) {
4147     coarse_ksp_type = KSPRICHARDSON;
4148     coarse_pc_type = PCBDDC;
4149   } else {
4150     coarse_ksp_type = KSPPREONLY;
4151     coarse_pc_type = PCREDUNDANT;
4152   }
4153 
4154   /* print some info if requested */
4155   if (pcbddc->dbg_flag) {
4156     if (!multilevel_allowed) {
4157       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4158       if (multilevel_requested) {
4159         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4160       } else if (pcbddc->max_levels) {
4161         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4162       }
4163       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4164     }
4165   }
4166 
4167   /* create the coarse KSP object only once with defaults */
4168   if (coarse_mat_is) {
4169     MatReuse coarse_mat_reuse;
4170     PetscViewer dbg_viewer = NULL;
4171     if (pcbddc->dbg_flag) {
4172       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4173       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4174     }
4175     if (!pcbddc->coarse_ksp) {
4176       char prefix[256],str_level[16];
4177       size_t len;
4178       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4179       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4180       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4181       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4182       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4183       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4184       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4185       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4186       /* prefix */
4187       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4188       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4189       if (!pcbddc->current_level) {
4190         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4191         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4192       } else {
4193         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4194         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4195         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4196         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4197         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4198         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4199       }
4200       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4201       /* allow user customization */
4202       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4203       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4204     }
4205 
4206     /* get some info after set from options */
4207     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4208     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4209     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4210     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4211     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4212       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4213       isbddc = PETSC_FALSE;
4214     }
4215     if (isredundant) {
4216       KSP inner_ksp;
4217       PC inner_pc;
4218       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4219       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4220       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4221     }
4222 
4223     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4224     ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4225     ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4226     ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4227     if (nisdofs) {
4228       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4229       for (i=0;i<nisdofs;i++) {
4230         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4231       }
4232     }
4233     if (nisneu) {
4234       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4235       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4236     }
4237 
4238     /* assemble coarse matrix */
4239     if (coarse_reuse) {
4240       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4241       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4242       coarse_mat_reuse = MAT_REUSE_MATRIX;
4243     } else {
4244       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4245     }
4246     if (isbddc || isnn) {
4247       if (pcbddc->coarsening_ratio > 1) {
4248         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4249           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4250           if (pcbddc->dbg_flag) {
4251             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4252             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4253             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4254             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4255           }
4256         }
4257         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4258       } else {
4259         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4260         coarse_mat = coarse_mat_is;
4261       }
4262     } else {
4263       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4264     }
4265     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4266 
4267     /* propagate symmetry info to coarse matrix */
4268     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
4269     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4270 
4271     /* set operators */
4272     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4273     if (pcbddc->dbg_flag) {
4274       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4275     }
4276   } else { /* processes non partecipating to coarse solver (if any) */
4277     coarse_mat = 0;
4278   }
4279   ierr = PetscFree(isarray);CHKERRQ(ierr);
4280 #if 0
4281   {
4282     PetscViewer viewer;
4283     char filename[256];
4284     sprintf(filename,"coarse_mat.m");
4285     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4286     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4287     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4288     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4289   }
4290 #endif
4291 
4292   /* Compute coarse null space (special handling by BDDC only) */
4293   if (pcbddc->NullSpace) {
4294     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4295   }
4296 
4297   if (pcbddc->coarse_ksp) {
4298     Vec crhs,csol;
4299     PetscBool ispreonly;
4300     if (CoarseNullSpace) {
4301       if (isbddc) {
4302         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4303       } else {
4304         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4305       }
4306     }
4307     /* setup coarse ksp */
4308     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4309     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4310     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4311     /* hack */
4312     if (!csol) {
4313       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4314     }
4315     if (!crhs) {
4316       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4317     }
4318     /* Check coarse problem if in debug mode or if solving with an iterative method */
4319     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4320     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4321       KSP       check_ksp;
4322       KSPType   check_ksp_type;
4323       PC        check_pc;
4324       Vec       check_vec,coarse_vec;
4325       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4326       PetscInt  its;
4327       PetscBool compute_eigs;
4328       PetscReal *eigs_r,*eigs_c;
4329       PetscInt  neigs;
4330       const char *prefix;
4331 
4332       /* Create ksp object suitable for estimation of extreme eigenvalues */
4333       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4334       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4335       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4336       if (ispreonly) {
4337         check_ksp_type = KSPPREONLY;
4338         compute_eigs = PETSC_FALSE;
4339       } else {
4340         check_ksp_type = KSPGMRES;
4341         compute_eigs = PETSC_TRUE;
4342       }
4343       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4344       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4345       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4346       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4347       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4348       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4349       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4350       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4351       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4352       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4353       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4354       /* create random vec */
4355       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4356       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4357       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4358       if (CoarseNullSpace) {
4359         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4360       }
4361       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4362       /* solve coarse problem */
4363       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4364       if (CoarseNullSpace) {
4365         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4366       }
4367       /* set eigenvalue estimation if preonly has not been requested */
4368       if (compute_eigs) {
4369         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4370         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4371         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4372         lambda_max = eigs_r[neigs-1];
4373         lambda_min = eigs_r[0];
4374         if (pcbddc->use_coarse_estimates) {
4375           if (lambda_max>lambda_min) {
4376             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4377             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4378           }
4379         }
4380       }
4381 
4382       /* check coarse problem residual error */
4383       if (pcbddc->dbg_flag) {
4384         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4385         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4386         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4387         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4388         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4389         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4390         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4391         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4392         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4393         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4394         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4395         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4396         if (compute_eigs) {
4397           PetscReal lambda_max_s,lambda_min_s;
4398           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4399           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4400           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4401           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4402           for (i=0;i<neigs;i++) {
4403             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4404           }
4405         }
4406         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4407         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4408       }
4409       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4410       if (compute_eigs) {
4411         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4412         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4413       }
4414     }
4415   }
4416   /* print additional info */
4417   if (pcbddc->dbg_flag) {
4418     /* waits until all processes reaches this point */
4419     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4420     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4421     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4422   }
4423 
4424   /* free memory */
4425   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4426   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4427   PetscFunctionReturn(0);
4428 }
4429 
4430 #undef __FUNCT__
4431 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4432 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4433 {
4434   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4435   PC_IS*         pcis = (PC_IS*)pc->data;
4436   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4437   PetscInt       i,coarse_size;
4438   PetscInt       *local_primal_indices;
4439   PetscErrorCode ierr;
4440 
4441   PetscFunctionBegin;
4442   /* Compute global number of coarse dofs */
4443   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
4444     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
4445   }
4446   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
4447 
4448   /* check numbering */
4449   if (pcbddc->dbg_flag) {
4450     PetscScalar coarsesum,*array;
4451     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4452 
4453     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4454     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4455     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4456     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4457     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4458     for (i=0;i<pcbddc->local_primal_size;i++) {
4459       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4460     }
4461     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4462     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4463     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4464     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4465     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4466     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4467     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4468     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4469     for (i=0;i<pcis->n;i++) {
4470       if (array[i] == 1.0) {
4471         set_error = PETSC_TRUE;
4472         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4473       }
4474     }
4475     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4476     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4477     for (i=0;i<pcis->n;i++) {
4478       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4479     }
4480     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4481     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4482     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4483     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4484     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4485     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4486     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4487       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4488       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4489       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4490       for (i=0;i<pcbddc->local_primal_size;i++) {
4491         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
4492       }
4493       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4494     }
4495     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4496     if (set_error_reduced) {
4497       SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4498     }
4499   }
4500   /* get back data */
4501   *coarse_size_n = coarse_size;
4502   *local_primal_indices_n = local_primal_indices;
4503   PetscFunctionReturn(0);
4504 }
4505 
4506 #undef __FUNCT__
4507 #define __FUNCT__ "PCBDDCGlobalToLocal"
4508 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4509 {
4510   IS             localis_t;
4511   PetscInt       i,lsize,*idxs,n;
4512   PetscScalar    *vals;
4513   PetscErrorCode ierr;
4514 
4515   PetscFunctionBegin;
4516   /* get indices in local ordering exploiting local to global map */
4517   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4518   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4519   for (i=0;i<lsize;i++) vals[i] = 1.0;
4520   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4521   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4522   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4523   if (idxs) { /* multilevel guard */
4524     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4525   }
4526   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4527   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4528   ierr = PetscFree(vals);CHKERRQ(ierr);
4529   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4530   /* now compute set in local ordering */
4531   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4532   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4533   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4534   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4535   for (i=0,lsize=0;i<n;i++) {
4536     if (PetscRealPart(vals[i]) > 0.5) {
4537       lsize++;
4538     }
4539   }
4540   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4541   for (i=0,lsize=0;i<n;i++) {
4542     if (PetscRealPart(vals[i]) > 0.5) {
4543       idxs[lsize++] = i;
4544     }
4545   }
4546   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4547   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4548   *localis = localis_t;
4549   PetscFunctionReturn(0);
4550 }
4551 
4552 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4553 #undef __FUNCT__
4554 #define __FUNCT__ "PCBDDCMatMult_Private"
4555 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4556 {
4557   PCBDDCChange_ctx change_ctx;
4558   PetscErrorCode   ierr;
4559 
4560   PetscFunctionBegin;
4561   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4562   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4563   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4564   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4565   PetscFunctionReturn(0);
4566 }
4567 
4568 #undef __FUNCT__
4569 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4570 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4571 {
4572   PCBDDCChange_ctx change_ctx;
4573   PetscErrorCode   ierr;
4574 
4575   PetscFunctionBegin;
4576   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4577   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4578   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4579   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4580   PetscFunctionReturn(0);
4581 }
4582 
4583 #undef __FUNCT__
4584 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4585 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4586 {
4587   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4588   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4589   PetscInt            *used_xadj,*used_adjncy;
4590   PetscBool           free_used_adj;
4591   PetscErrorCode      ierr;
4592 
4593   PetscFunctionBegin;
4594   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4595   free_used_adj = PETSC_FALSE;
4596   if (pcbddc->sub_schurs_layers == -1) {
4597     used_xadj = NULL;
4598     used_adjncy = NULL;
4599   } else {
4600     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4601       used_xadj = pcbddc->mat_graph->xadj;
4602       used_adjncy = pcbddc->mat_graph->adjncy;
4603     } else if (pcbddc->computed_rowadj) {
4604       used_xadj = pcbddc->mat_graph->xadj;
4605       used_adjncy = pcbddc->mat_graph->adjncy;
4606     } else {
4607       PetscBool      flg_row=PETSC_FALSE;
4608       const PetscInt *xadj,*adjncy;
4609       PetscInt       nvtxs;
4610 
4611       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4612       if (flg_row) {
4613         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4614         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4615         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4616         free_used_adj = PETSC_TRUE;
4617       } else {
4618         pcbddc->sub_schurs_layers = -1;
4619         used_xadj = NULL;
4620         used_adjncy = NULL;
4621       }
4622       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4623     }
4624   }
4625   ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->adaptive_selection,pcbddc->use_deluxe_scaling,pcbddc->adaptive_invert_Stildas,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4626 
4627   /* free adjacency */
4628   if (free_used_adj) {
4629     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4630   }
4631   PetscFunctionReturn(0);
4632 }
4633 
4634 #undef __FUNCT__
4635 #define __FUNCT__ "PCBDDCInitSubSchurs"
4636 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4637 {
4638   PC_IS               *pcis=(PC_IS*)pc->data;
4639   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4640   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4641   PCBDDCGraph         graph;
4642   Mat                 S_j;
4643   PetscErrorCode      ierr;
4644 
4645   PetscFunctionBegin;
4646   /* attach interface graph for determining subsets */
4647   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4648     IS verticesIS;
4649 
4650     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4651     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4652     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr);
4653     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr);
4654     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4655     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4656 /*
4657     if (pcbddc->dbg_flag) {
4658       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4659     }
4660 */
4661   } else {
4662     graph = pcbddc->mat_graph;
4663   }
4664 
4665   /* Create Schur complement matrix */
4666   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4667   ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4668 
4669   /* sub_schurs init */
4670   ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_threshold);CHKERRQ(ierr);
4671   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4672   /* free graph struct */
4673   if (pcbddc->sub_schurs_rebuild) {
4674     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4675   }
4676   PetscFunctionReturn(0);
4677 }
4678