xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 9ab7bb1694f566b5d37b9a371adb0cd0134092b1)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscScalar     *Smult,*Seigv;
20   PetscReal       *eigs,thresh;
21   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscErrorCode  ierr;
26 
27   PetscFunctionBegin;
28   if (!sub_schurs->use_mumps) {
29     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
30   }
31   if (!sub_schurs->is_hermitian || !sub_schurs->is_posdef) {
32     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented");
33   }
34 
35   /* max size of subsets */
36   mss = 0;
37   for (i=0;i<sub_schurs->n_subs;i++) {
38     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
39       PetscInt subset_size;
40       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
41       mss = PetscMax(mss,subset_size);
42     }
43   }
44 
45   /* min/max and threshold */
46   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
47   nmin = pcbddc->adaptive_nmin > -1 ? pcbddc->adaptive_nmin : 1;
48   nmax = PetscMax(nmin,nmax);
49   if (pcbddc->adaptive_threshold > 1.0) {
50     thresh = 1.0/pcbddc->adaptive_threshold;
51   } else {
52     thresh = 1.0;
53   }
54 
55   /* allocate lapack workspace */
56   cum = cum2 = 0;
57   maxneigs = 0;
58   for (i=0;i<sub_schurs->n_subs;i++) {
59     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
60       PetscInt n,subset_size;
61       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
62       n = PetscMin(subset_size,nmax);
63       cum += subset_size*n;
64       cum2 += n;
65       maxneigs = PetscMax(maxneigs,n);
66     }
67   }
68 
69   if (mss) {
70     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
71       PetscBLASInt B_itype = 1;
72       PetscBLASInt B_N = mss;
73       PetscScalar  zero = 0.0;
74       PetscScalar  eps = 0.0; /* dlamch? */
75 
76       B_lwork = -1;
77       S = NULL;
78       St = NULL;
79       eigs = NULL;
80       eigv = NULL;
81       B_iwork = NULL;
82       B_ifail = NULL;
83       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
84 #if defined(PETSC_USE_COMPLEX)
85       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
86 #else
87       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
88 #endif
89       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
90       ierr = PetscFPTrapPop();CHKERRQ(ierr);
91     } else {
92         /* TODO */
93     }
94   } else {
95     lwork = 0;
96   }
97 
98   nv = 0;
99   if (sub_schurs->is_Ej_com) { /* complement of subsets, each entry is a vertex */
100     ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr);
101   }
102   ierr = PetscBLASIntCast((PetscInt)lwork,&B_lwork);CHKERRQ(ierr);
103   ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs,
104                       B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
105 #if defined(PETSC_USE_COMPLEX)
106   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
107 #endif
108   ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr);
109   ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
110                       nv+cum2+1,&pcbddc->adaptive_constraints_ptrs,
111                       nv+cum,&pcbddc->adaptive_constraints_idxs,
112                       nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
113   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
114 
115   maxneigs = 0;
116   cum = cum2 = cumarray = 0;
117   if (sub_schurs->is_Ej_com) {
118     const PetscInt *idxs;
119 
120     ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
121     for (cum=0;cum<nv;cum++) {
122       pcbddc->adaptive_constraints_n[cum] = 1;
123       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
124       pcbddc->adaptive_constraints_ptrs[cum] = cum;
125       pcbddc->adaptive_constraints_data[cum] = 1.0;
126     }
127     cum2 = cum;
128     ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
129   }
130 
131   if (mss) { /* multilevel */
132     if (pcbddc->use_deluxe_scaling) {
133       ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
134     } else {
135       ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr);
136     }
137     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
138   }
139 
140   for (i=0;i<sub_schurs->n_subs;i++) {
141     PetscInt j,subset_size;
142 
143     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
144     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
145       const PetscInt *idxs;
146       PetscScalar    one = 1.0,zero=0.0;
147       PetscBLASInt   B_N;
148 
149       /* S should be copied since we need it for deluxe scaling */
150       if (sub_schurs->is_hermitian) {
151         PetscInt j;
152         for (j=0;j<subset_size;j++) {
153           ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
154         }
155         for (j=0;j<subset_size;j++) {
156           ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
157         }
158       } else {
159         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
160         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
161       }
162       /* is always this the right matrix? */
163       ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
164 
165       /* we could reuse space already allocated when building sum_S_Ej_tilda_all */
166       /* St = Starray+cumarray; */
167 
168       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
169         PetscBLASInt B_itype = 1;
170         PetscBLASInt B_IL = 1, B_IU;
171         PetscScalar  eps = -1.0; /* dlamch? */
172         PetscInt     nmin_s;
173 
174         /* ask for eigenvalues lower than thresh */
175         PetscPrintf(PETSC_COMM_SELF,"[%d] Computing for sub %d/%d.\n",PetscGlobalRank,i,sub_schurs->n_subs);
176         ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
177         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
178 #if defined(PETSC_USE_COMPLEX)
179         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
180 #else
181         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
182 #endif
183         ierr = PetscFPTrapPop();CHKERRQ(ierr);
184         if (B_ierr) {
185           if (B_ierr < 0 ) {
186             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
187           } else if (B_ierr <= B_N) {
188             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
189           } else {
190             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
191           }
192         }
193 
194         if (B_neigs > nmax) {
195           PetscPrintf(PETSC_COMM_SELF,"[%d]   found %d eigs, more than maximum required %d.\n",PetscGlobalRank,B_neigs,nmax);
196           B_neigs = nmax;
197         }
198 
199         nmin_s = PetscMin(nmin,B_N);
200         if (B_neigs < nmin_s) {
201           PetscBLASInt B_neigs2;
202 
203           B_IL = B_neigs + 1;
204           ierr = PetscBLASIntCast(nmin_s,&B_IU);CHKERRQ(ierr);
205           PetscPrintf(PETSC_COMM_SELF,"[%d]   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",PetscGlobalRank,B_neigs,nmin,B_IL,B_IU);
206           if (sub_schurs->is_hermitian) {
207             PetscInt j;
208             for (j=0;j<subset_size;j++) {
209               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
210             }
211             for (j=0;j<subset_size;j++) {
212               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
213             }
214           } else {
215             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
216             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
217           }
218           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
219 #if defined(PETSC_USE_COMPLEX)
220           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
221 #else
222           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
223 #endif
224           ierr = PetscFPTrapPop();CHKERRQ(ierr);
225           B_neigs += B_neigs2;
226         }
227         if (B_ierr) {
228           if (B_ierr < 0 ) {
229             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
230           } else if (B_ierr <= B_N) {
231             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
232           } else {
233             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
234           }
235         }
236         PetscPrintf(PETSC_COMM_SELF,"[%d]   -> Got %d eigs\n",PetscGlobalRank,B_neigs);
237         for (j=0;j<B_neigs;j++) {
238           if (eigs[j] == 0.0) {
239             PetscPrintf(PETSC_COMM_SELF,"[%d]     Inf\n",PetscGlobalRank);
240           } else {
241             PetscPrintf(PETSC_COMM_SELF,"[%d]     %1.6e\n",PetscGlobalRank,1.0/eigs[j]);
242           }
243         }
244       } else {
245           /* TODO */
246       }
247       maxneigs = PetscMax(B_neigs,maxneigs);
248       pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
249 
250       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
251       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&zero,Seigv,&B_N));
252       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
253 #if 0
254       PetscInt ii;
255       for (ii=0;ii<B_neigs;ii++) {
256         PetscPrintf(PETSC_COMM_SELF,"[%d]   -> Eigenvector %d/%d (%d)\n",PetscGlobalRank,ii,B_neigs,B_N);
257         for (j=0;j<B_N;j++) {
258           PetscPrintf(PETSC_COMM_SELF,"[%d]       %1.4e %1.4e\n",PetscGlobalRank,eigv[ii*B_N+j],Seigv[ii*B_N+j]);
259         }
260       }
261 #endif
262       ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
263       for (j=0;j<B_neigs;j++) {
264 #if 0
265         {
266           PetscBLASInt Blas_N,Blas_one = 1.0;
267           PetscScalar norm;
268           ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
269           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
270           if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
271             norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
272           } else {
273             norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
274           }
275           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
276         }
277 #endif
278         ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
279         pcbddc->adaptive_constraints_ptrs[cum++] = cum2;
280         cum2 += subset_size;
281       }
282       ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
283     }
284     /* shift for next computation */
285     cumarray += subset_size*subset_size;
286   }
287   pcbddc->adaptive_constraints_ptrs[cum] = cum2;
288   ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr);
289 
290   if (mss) {
291     if (pcbddc->use_deluxe_scaling) {
292       ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
293     } else {
294       ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr);
295     }
296     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
297   }
298   ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
299 #if defined(PETSC_USE_COMPLEX)
300   ierr = PetscFree(rwork);CHKERRQ(ierr);
301 #endif
302   if (pcbddc->dbg_flag) {
303     PetscInt maxneigs_r;
304     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
305     ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
306   }
307   PetscFunctionReturn(0);
308 }
309 
310 #undef __FUNCT__
311 #define __FUNCT__ "PCBDDCSetUpSolvers"
312 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
313 {
314   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
315   PetscScalar    *coarse_submat_vals;
316   PetscErrorCode ierr;
317 
318   PetscFunctionBegin;
319   /* Setup local scatters R_to_B and (optionally) R_to_D */
320   /* PCBDDCSetUpLocalWorkVectors should be called first! */
321   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
322 
323   /* Setup local neumann solver ksp_R */
324   /* PCBDDCSetUpLocalScatters should be called first! */
325   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
326 
327   /* Change global null space passed in by the user if change of basis has been requested */
328   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
329     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
330   }
331 
332   /*
333      Setup local correction and local part of coarse basis.
334      Gives back the dense local part of the coarse matrix in column major ordering
335   */
336   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
337 
338   /* Compute total number of coarse nodes and setup coarse solver */
339   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
340 
341   /* free */
342   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
343   PetscFunctionReturn(0);
344 }
345 
346 #undef __FUNCT__
347 #define __FUNCT__ "PCBDDCResetCustomization"
348 PetscErrorCode PCBDDCResetCustomization(PC pc)
349 {
350   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
351   PetscErrorCode ierr;
352 
353   PetscFunctionBegin;
354   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
355   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
356   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
357   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
358   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
359   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
360   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
361   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
362   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
363   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
364   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
365   PetscFunctionReturn(0);
366 }
367 
368 #undef __FUNCT__
369 #define __FUNCT__ "PCBDDCResetTopography"
370 PetscErrorCode PCBDDCResetTopography(PC pc)
371 {
372   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
373   PetscErrorCode ierr;
374 
375   PetscFunctionBegin;
376   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
377   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
378   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
379   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
380   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
381   PetscFunctionReturn(0);
382 }
383 
384 #undef __FUNCT__
385 #define __FUNCT__ "PCBDDCResetSolvers"
386 PetscErrorCode PCBDDCResetSolvers(PC pc)
387 {
388   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
389   PetscScalar    *array;
390   PetscErrorCode ierr;
391 
392   PetscFunctionBegin;
393   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
394   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
395   ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
396   ierr = PetscFree(array);CHKERRQ(ierr);
397   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
398   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
399   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
400   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
401   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
402   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
403   if (pcbddc->local_auxmat2) {
404     ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
405     ierr = PetscFree(array);CHKERRQ(ierr);
406   }
407   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
408   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
409   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
410   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
411   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
412   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
413   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
414   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
415   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
416   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
417   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
418   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
419   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
420   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
421   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
422   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
423   PetscFunctionReturn(0);
424 }
425 
426 #undef __FUNCT__
427 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
428 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
429 {
430   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
431   PC_IS          *pcis = (PC_IS*)pc->data;
432   VecType        impVecType;
433   PetscInt       n_constraints,n_R,old_size;
434   PetscErrorCode ierr;
435 
436   PetscFunctionBegin;
437   if (!pcbddc->ConstraintMatrix) {
438     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
439   }
440   /* get sizes */
441   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
442   n_R = pcis->n-pcbddc->n_actual_vertices;
443   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
444   /* local work vectors (try to avoid unneeded work)*/
445   /* R nodes */
446   old_size = -1;
447   if (pcbddc->vec1_R) {
448     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
449   }
450   if (n_R != old_size) {
451     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
452     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
453     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
454     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
455     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
456     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
457   }
458   /* local primal dofs */
459   old_size = -1;
460   if (pcbddc->vec1_P) {
461     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
462   }
463   if (pcbddc->local_primal_size != old_size) {
464     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
465     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
466     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
467     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
468   }
469   /* local explicit constraints */
470   old_size = -1;
471   if (pcbddc->vec1_C) {
472     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
473   }
474   if (n_constraints && n_constraints != old_size) {
475     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
476     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
477     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
478     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
479   }
480   PetscFunctionReturn(0);
481 }
482 
483 #undef __FUNCT__
484 #define __FUNCT__ "PCBDDCSetUpCorrection"
485 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
486 {
487   PetscErrorCode         ierr;
488   /* pointers to pcis and pcbddc */
489   PC_IS*                 pcis = (PC_IS*)pc->data;
490   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
491   /* submatrices of local problem */
492   Mat                    A_RV,A_VR,A_VV;
493   /* submatrices of local coarse problem */
494   Mat                    S_VV,S_CV,S_VC,S_CC;
495   /* working matrices */
496   Mat                    C_CR;
497   /* additional working stuff */
498   PC                     pc_R;
499   Mat                    F;
500   PetscBool              isLU,isCHOL,isILU;
501 
502   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
503   PetscScalar            *work;
504   PetscInt               *idx_V_B;
505   PetscInt               n,n_vertices,n_constraints;
506   PetscInt               i,n_R,n_D,n_B;
507   PetscBool              unsymmetric_check;
508   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
509   MatType                impMatType;
510   /* some shortcuts to scalars */
511   PetscScalar            one=1.0,m_one=-1.0;
512 
513   PetscFunctionBegin;
514   /* get number of vertices (corners plus constraints with change of basis)
515      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
516   n_vertices = pcbddc->n_actual_vertices;
517   n_constraints = pcbddc->local_primal_size-n_vertices;
518   /* Set Non-overlapping dimensions */
519   n_B = pcis->n_B; n_D = pcis->n - n_B;
520   n_R = pcis->n-n_vertices;
521 
522   /* Set types for local objects needed by BDDC precondtioner */
523   impMatType = MATSEQDENSE;
524 
525   /* vertices in boundary numbering */
526   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
527   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
528   if (i != n_vertices) {
529     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
530   }
531 
532   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
533   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
534   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
535   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
536   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
537   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
538   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
539   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
540   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
541   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
542 
543   unsymmetric_check = PETSC_FALSE;
544   /* allocate workspace */
545   n = 0;
546   if (n_constraints) {
547     n += n_R*n_constraints;
548   }
549   if (n_vertices) {
550     n = PetscMax(2*n_R*n_vertices,n);
551   }
552   if (!pcbddc->issym) {
553     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
554     unsymmetric_check = PETSC_TRUE;
555   }
556   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
557 
558   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
559   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
560   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
561   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
562   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
563   if (isLU || isILU || isCHOL) {
564     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
565   } else {
566     F = NULL;
567   }
568 
569   /* Precompute stuffs needed for preprocessing and application of BDDC*/
570   if (n_constraints) {
571     Mat M1,M2,M3;
572     IS  is_aux;
573     /* see if we can save some allocations */
574     if (pcbddc->local_auxmat2) {
575       PetscInt on_R,on_constraints;
576       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
577       if (on_R != n_R || on_constraints != n_constraints) {
578         PetscScalar *marray;
579 
580         ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr);
581         ierr = PetscFree(marray);CHKERRQ(ierr);
582         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
583         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
584       }
585     }
586     /* auxiliary matrices */
587     if (!pcbddc->local_auxmat2) {
588       PetscScalar *marray;
589 
590       ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr);
591       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr);
592       marray += n_R*n_constraints;
593       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr);
594     }
595 
596     /* Extract constraints on R nodes: C_{CR}  */
597     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
598     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
599     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
600 
601     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
602     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
603     for (i=0;i<n_constraints;i++) {
604       const PetscScalar *row_cmat_values;
605       const PetscInt    *row_cmat_indices;
606       PetscInt          size_of_constraint,j;
607 
608       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
609       for (j=0;j<size_of_constraint;j++) {
610         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
611       }
612       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
613     }
614     if (F) {
615       Mat B;
616 
617       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
618       ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr);
619       ierr = MatDestroy(&B);CHKERRQ(ierr);
620     } else {
621       PetscScalar *xarray;
622       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
623       for (i=0;i<n_constraints;i++) {
624         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
625         ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr);
626         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
627         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
628         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
629       }
630       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
631     }
632 
633     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
634     ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
635     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
636     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
637     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
638     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
639     ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr);
640     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
641     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
642     ierr = MatDestroy(&M2);CHKERRQ(ierr);
643     ierr = MatDestroy(&M3);CHKERRQ(ierr);
644     /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */
645     ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
646     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
647     ierr = MatDestroy(&M1);CHKERRQ(ierr);
648   }
649   /* Get submatrices from subdomain matrix */
650   if (n_vertices) {
651     Mat       newmat;
652     IS        is_aux;
653     PetscInt  ibs,mbs;
654     PetscBool issbaij;
655 
656     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
657     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
658     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
659     if (ibs != mbs) { /* need to convert to SEQAIJ */
660       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
661       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
662       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
663       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
664       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
665     } else {
666       /* this is safe */
667       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
668       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
669       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
670         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
671         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
672         ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
673         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
674         ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
675       } else {
676         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
677         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
678       }
679     }
680     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
681   }
682 
683   /* Matrix of coarse basis functions (local) */
684   if (pcbddc->coarse_phi_B) {
685     PetscInt on_B,on_primal,on_D=n_D;
686     if (pcbddc->coarse_phi_D) {
687       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
688     }
689     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
690     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
691       PetscScalar *marray;
692 
693       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
694       ierr = PetscFree(marray);CHKERRQ(ierr);
695       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
696       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
697       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
698       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
699     }
700   }
701 
702   if (!pcbddc->coarse_phi_B) {
703     PetscScalar *marray;
704 
705     n = n_B*pcbddc->local_primal_size;
706     if (pcbddc->switch_static || pcbddc->dbg_flag) {
707       n += n_D*pcbddc->local_primal_size;
708     }
709     if (!pcbddc->issym) {
710       n *= 2;
711     }
712     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
713     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
714     n = n_B*pcbddc->local_primal_size;
715     if (pcbddc->switch_static || pcbddc->dbg_flag) {
716       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
717       n += n_D*pcbddc->local_primal_size;
718     }
719     if (!pcbddc->issym) {
720       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
721       if (pcbddc->switch_static || pcbddc->dbg_flag) {
722         n = n_B*pcbddc->local_primal_size;
723         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
724       }
725     } else {
726       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
727       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
728       if (pcbddc->switch_static || pcbddc->dbg_flag) {
729         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
730         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
731       }
732     }
733   }
734   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
735   /* vertices */
736   if (n_vertices) {
737 
738     if (n_R) {
739       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
740       PetscBLASInt B_N,B_one = 1;
741       PetscScalar  *x,*y;
742 
743       ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
744       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
745       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
746       if (F) {
747         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
748       } else {
749         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
750         for (i=0;i<n_vertices;i++) {
751           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
752           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
753           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
754           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
755           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
756         }
757         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
758       }
759       ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr);
760       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
761       if (n_constraints) {
762         Mat B;
763         ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
764         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
765         ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
766         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
767         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
768         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
769         ierr = MatDestroy(&B);CHKERRQ(ierr);
770       }
771       ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
772       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
773       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
774       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
775       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
776       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
777       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
778       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
779       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
780       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
781       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
782       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
783     } else {
784       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
785       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
786     }
787     /* coarse basis functions */
788     for (i=0;i<n_vertices;i++) {
789       PetscScalar *y;
790 
791       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
792       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
793       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
794       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
795       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
796       y[n_B*i+idx_V_B[i]] = 1.0;
797       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
798       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
799 
800       if (pcbddc->switch_static || pcbddc->dbg_flag) {
801         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
802         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
803         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
804         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
805         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
806         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
807       }
808       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
809     }
810     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
811     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
812   }
813 
814   if (n_constraints) {
815     Mat B;
816 
817     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
818     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
819     ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
820     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
821     if (n_vertices) {
822       ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr);
823     }
824     ierr = MatDestroy(&B);CHKERRQ(ierr);
825     /* coarse basis functions */
826     for (i=0;i<n_constraints;i++) {
827       PetscScalar *y;
828 
829       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
830       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
831       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
832       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
833       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
834       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
835       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
836 
837       if (pcbddc->switch_static || pcbddc->dbg_flag) {
838         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
839         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
840         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
841         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
842         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
843         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
844       }
845       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
846     }
847   }
848 
849   /* compute other basis functions for non-symmetric problems */
850   if (!pcbddc->issym) {
851     Mat B,X;
852 
853     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr);
854 
855     if (n_constraints) {
856       Mat S_CCT,B_C;
857 
858       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
859       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
860       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
861       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
862       if (n_vertices) {
863         Mat B_V,S_VCT;
864 
865         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
866         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
867         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
868         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
869         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
870       }
871       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
872     }
873     if (n_vertices && n_R) {
874       Mat          A_VRT;
875       PetscBLASInt B_N,B_one = 1;
876 
877       if (!n_constraints) { /* if there are no constraints, reset work */
878         ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
879       }
880       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr);
881       ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
882       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
883       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one));
884       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
885     }
886 
887     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr);
888     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
889       for (i=0;i<pcbddc->local_primal_size;i++) {
890         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
891         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
892         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
893         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
894         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
895       }
896     } else {
897       for (i=0;i<pcbddc->local_primal_size;i++) {
898         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
899         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
900         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
901         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
902         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
903       }
904     }
905     ierr = MatDestroy(&B);CHKERRQ(ierr);
906     /* coarse basis functions */
907     for (i=0;i<pcbddc->local_primal_size;i++) {
908       PetscScalar *y;
909 
910       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
911       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
912       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
913       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
914       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
915       if (i<n_vertices) {
916         y[n_B*i+idx_V_B[i]] = 1.0;
917       }
918       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
919       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
920 
921       if (pcbddc->switch_static || pcbddc->dbg_flag) {
922         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
923         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
924         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
925         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
926         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
927         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
928       }
929       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
930     }
931     ierr = MatDestroy(&X);CHKERRQ(ierr);
932   }
933 
934   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
935   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
936   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
937   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
938   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
939   /* Checking coarse_sub_mat and coarse basis functios */
940   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
941   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
942   if (pcbddc->dbg_flag) {
943     Mat         coarse_sub_mat;
944     Mat         AUXMAT,TM1,TM2,TM3,TM4;
945     Mat         coarse_phi_D,coarse_phi_B;
946     Mat         coarse_psi_D,coarse_psi_B;
947     Mat         A_II,A_BB,A_IB,A_BI;
948     MatType     checkmattype=MATSEQAIJ;
949     PetscReal   real_value;
950 
951     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
952     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
953     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
954     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
955     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
956     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
957     if (unsymmetric_check) {
958       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
959       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
960     }
961     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
962 
963     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
964     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr);
965     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
966     if (unsymmetric_check) {
967       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
968       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
969       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
970       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
971       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
972       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
973       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
974       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
975       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
976       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
977       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
978       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
979     } else {
980       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
981       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
982       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
983       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
984       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
985       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
986       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
987       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
988     }
989     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
990     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
991     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
992     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
993     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
994     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
995 
996     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
997     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
998     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
999     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1000     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1001     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1002     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1003     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1004     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1005     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1006     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1007     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1008     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1009     if (unsymmetric_check) {
1010       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1011       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1012     }
1013     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1014   }
1015 
1016   /* free memory */
1017   ierr = PetscFree(work);CHKERRQ(ierr);
1018   if (n_vertices) {
1019     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1020   }
1021   if (n_constraints) {
1022     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1023   }
1024   /* get back data */
1025   *coarse_submat_vals_n = coarse_submat_vals;
1026   PetscFunctionReturn(0);
1027 }
1028 
1029 #undef __FUNCT__
1030 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1031 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, MatStructure reuse, Mat* B)
1032 {
1033   Mat            *work_mat;
1034   IS             isrow_s,iscol_s;
1035   PetscBool      rsorted,csorted;
1036   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1037   PetscErrorCode ierr;
1038 
1039   PetscFunctionBegin;
1040   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1041   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1042   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1043   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1044 
1045   if (!rsorted) {
1046     const PetscInt *idxs;
1047     PetscInt *idxs_sorted,i;
1048 
1049     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1050     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1051     for (i=0;i<rsize;i++) {
1052       idxs_perm_r[i] = i;
1053     }
1054     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1055     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1056     for (i=0;i<rsize;i++) {
1057       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1058     }
1059     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1060     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1061   } else {
1062     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1063     isrow_s = isrow;
1064   }
1065 
1066   if (!csorted) {
1067     if (isrow == iscol) {
1068       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1069       iscol_s = isrow_s;
1070     } else {
1071       const PetscInt *idxs;
1072       PetscInt *idxs_sorted,i;
1073 
1074       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1075       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1076       for (i=0;i<csize;i++) {
1077         idxs_perm_c[i] = i;
1078       }
1079       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1080       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1081       for (i=0;i<csize;i++) {
1082         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1083       }
1084       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1085       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1086     }
1087   } else {
1088     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1089     iscol_s = iscol;
1090   }
1091 
1092   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,reuse,&work_mat);CHKERRQ(ierr);
1093 
1094   if (!rsorted || !csorted) {
1095     Mat      new_mat;
1096     IS       is_perm_r,is_perm_c;
1097 
1098     if (!rsorted) {
1099       PetscInt *idxs_r,i;
1100       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1101       for (i=0;i<rsize;i++) {
1102         idxs_r[idxs_perm_r[i]] = i;
1103       }
1104       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1105       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1106     } else {
1107       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1108     }
1109     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1110 
1111     if (!csorted) {
1112       if (isrow_s == iscol_s) {
1113         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1114         is_perm_c = is_perm_r;
1115       } else {
1116         PetscInt *idxs_c,i;
1117         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1118         for (i=0;i<csize;i++) {
1119           idxs_c[idxs_perm_c[i]] = i;
1120         }
1121         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1122         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1123       }
1124     } else {
1125       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1126     }
1127     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1128 
1129     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1130     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1131     work_mat[0] = new_mat;
1132     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1133     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1134   }
1135 
1136   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1137   *B = work_mat[0];
1138   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1139   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1140   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1141   PetscFunctionReturn(0);
1142 }
1143 
1144 #undef __FUNCT__
1145 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1146 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1147 {
1148   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1149   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1150   Mat            new_mat;
1151   IS             is_local,is_global;
1152   PetscInt       local_size;
1153   PetscBool      isseqaij;
1154   PetscErrorCode ierr;
1155 
1156   PetscFunctionBegin;
1157   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1158   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1159   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1160   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1161   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1162   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,MAT_INITIAL_MATRIX,&new_mat);CHKERRQ(ierr);
1163   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1164 
1165   /* check */
1166   if (pcbddc->dbg_flag) {
1167     Vec       x,x_change;
1168     PetscReal error;
1169 
1170     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1171     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1172     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1173     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1174     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1175     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1176     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1177     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1178     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1179     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1180     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1181     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1182     ierr = VecDestroy(&x);CHKERRQ(ierr);
1183     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1184   }
1185 
1186   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1187   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1188   if (isseqaij) {
1189     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1190   } else {
1191     Mat work_mat;
1192     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1193     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1194     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1195   }
1196   ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
1197   /*
1198   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1199   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1200   */
1201   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 #undef __FUNCT__
1206 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1207 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1208 {
1209   PC_IS*         pcis = (PC_IS*)(pc->data);
1210   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1211   IS             is_aux1,is_aux2;
1212   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1213   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
1214   PetscInt       vbs,bs;
1215   PetscBT        bitmask;
1216   PetscErrorCode ierr;
1217 
1218   PetscFunctionBegin;
1219   /*
1220     No need to setup local scatters if
1221       - primal space is unchanged
1222         AND
1223       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1224         AND
1225       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1226   */
1227   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1228     PetscFunctionReturn(0);
1229   }
1230   /* destroy old objects */
1231   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1232   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1233   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1234   /* Set Non-overlapping dimensions */
1235   n_B = pcis->n_B; n_D = pcis->n - n_B;
1236   n_vertices = pcbddc->n_actual_vertices;
1237   /* create auxiliary bitmask */
1238   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1239   for (i=0;i<n_vertices;i++) {
1240     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
1241   }
1242 
1243   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1244   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1245   for (i=0, n_R=0; i<pcis->n; i++) {
1246     if (!PetscBTLookup(bitmask,i)) {
1247       idx_R_local[n_R] = i;
1248       n_R++;
1249     }
1250   }
1251 
1252   /* Block code */
1253   vbs = 1;
1254   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1255   if (bs>1 && !(n_vertices%bs)) {
1256     PetscBool is_blocked = PETSC_TRUE;
1257     PetscInt  *vary;
1258     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1259     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1260     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1261     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
1262     for (i=0; i<n_vertices; i++) {
1263       if (vary[i]!=0 && vary[i]!=bs) {
1264         is_blocked = PETSC_FALSE;
1265         break;
1266       }
1267     }
1268     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1269       vbs = bs;
1270       for (i=0;i<n_R/vbs;i++) {
1271         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1272       }
1273     }
1274     ierr = PetscFree(vary);CHKERRQ(ierr);
1275   }
1276   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1277   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1278 
1279   /* print some info if requested */
1280   if (pcbddc->dbg_flag) {
1281     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1282     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1283     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1284     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1285     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1286     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1287     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
1288     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1289   }
1290 
1291   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1292   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1293   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1294   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1295   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1296   for (i=0; i<n_D; i++) {
1297     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1298   }
1299   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1300   for (i=0, j=0; i<n_R; i++) {
1301     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1302       aux_array1[j++] = i;
1303     }
1304   }
1305   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1306   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1307   for (i=0, j=0; i<n_B; i++) {
1308     if (!PetscBTLookup(bitmask,is_indices[i])) {
1309       aux_array2[j++] = i;
1310     }
1311   }
1312   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1313   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1314   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1315   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1316   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1317 
1318   if (pcbddc->switch_static || pcbddc->dbg_flag) {
1319     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1320     for (i=0, j=0; i<n_R; i++) {
1321       if (PetscBTLookup(bitmask,idx_R_local[i])) {
1322         aux_array1[j++] = i;
1323       }
1324     }
1325     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1326     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1327     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1328   }
1329   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1330   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1331   PetscFunctionReturn(0);
1332 }
1333 
1334 
1335 #undef __FUNCT__
1336 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1337 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1338 {
1339   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1340   PC_IS          *pcis = (PC_IS*)pc->data;
1341   PC             pc_temp;
1342   Mat            A_RR;
1343   MatReuse       reuse;
1344   PetscScalar    m_one = -1.0;
1345   PetscReal      value;
1346   PetscInt       n_D,n_R,ibs,mbs;
1347   PetscBool      use_exact,use_exact_reduced,issbaij;
1348   PetscErrorCode ierr;
1349   /* prefixes stuff */
1350   char           dir_prefix[256],neu_prefix[256],str_level[16];
1351   size_t         len;
1352 
1353   PetscFunctionBegin;
1354 
1355   /* compute prefixes */
1356   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1357   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1358   if (!pcbddc->current_level) {
1359     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1360     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1361     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1362     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1363   } else {
1364     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1365     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1366     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1367     len -= 15; /* remove "pc_bddc_coarse_" */
1368     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1369     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1370     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1371     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1372     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1373     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1374     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1375     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1376   }
1377 
1378   /* DIRICHLET PROBLEM */
1379   if (dirichlet) {
1380     /* Matrix for Dirichlet problem is pcis->A_II */
1381     ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1382     if (!pcbddc->ksp_D) { /* create object if not yet build */
1383       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1384       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1385       /* default */
1386       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1387       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1388       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1389       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1390       if (issbaij) {
1391         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1392       } else {
1393         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1394       }
1395       /* Allow user's customization */
1396       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1397       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1398     }
1399     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1400     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1401     if (!n_D) {
1402       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1403       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1404     }
1405     /* Set Up KSP for Dirichlet problem of BDDC */
1406     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1407     /* set ksp_D into pcis data */
1408     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1409     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1410     pcis->ksp_D = pcbddc->ksp_D;
1411   }
1412 
1413   /* NEUMANN PROBLEM */
1414   A_RR = 0;
1415   if (neumann) {
1416     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1417     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1418     if (pcbddc->ksp_R) { /* already created ksp */
1419       PetscInt nn_R;
1420       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1421       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1422       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1423       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1424         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1425         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1426         reuse = MAT_INITIAL_MATRIX;
1427       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1428         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1429           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1430           reuse = MAT_INITIAL_MATRIX;
1431         } else { /* safe to reuse the matrix */
1432           reuse = MAT_REUSE_MATRIX;
1433         }
1434       }
1435       /* last check */
1436       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1437         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1438         reuse = MAT_INITIAL_MATRIX;
1439       }
1440     } else { /* first time, so we need to create the matrix */
1441       reuse = MAT_INITIAL_MATRIX;
1442     }
1443     /* extract A_RR */
1444     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1445     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1446     if (ibs != mbs) {
1447       Mat newmat;
1448       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1449       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1450       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1451     } else {
1452       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1453     }
1454     if (!pcbddc->ksp_R) { /* create object if not present */
1455       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1456       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1457       /* default */
1458       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1459       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1460       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1461       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1462       if (issbaij) {
1463         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1464       } else {
1465         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1466       }
1467       /* Allow user's customization */
1468       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1469       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1470     }
1471     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1472     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1473     if (!n_R) {
1474       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1475       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1476     }
1477     /* Set Up KSP for Neumann problem of BDDC */
1478     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1479   }
1480 
1481   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1482   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1483     if (pcbddc->dbg_flag) {
1484       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1485       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1486       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1487     }
1488     if (dirichlet) { /* Dirichlet */
1489       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1490       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1491       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1492       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1493       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1494       /* need to be adapted? */
1495       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1496       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1497       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1498       /* print info */
1499       if (pcbddc->dbg_flag) {
1500         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1501         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1502       }
1503       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1504         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1505       }
1506     }
1507     if (neumann) { /* Neumann */
1508       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1509       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1510       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1511       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1512       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1513       /* need to be adapted? */
1514       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1515       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1516       /* print info */
1517       if (pcbddc->dbg_flag) {
1518         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1519         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1520       }
1521       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1522         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1523       }
1524     }
1525   }
1526   /* free Neumann problem's matrix */
1527   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1528   PetscFunctionReturn(0);
1529 }
1530 
1531 #undef __FUNCT__
1532 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1533 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1534 {
1535   PetscErrorCode ierr;
1536   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1537 
1538   PetscFunctionBegin;
1539   if (applytranspose) {
1540     if (pcbddc->local_auxmat1) {
1541       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1542       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1543     }
1544     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1545   } else {
1546     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1547     if (pcbddc->local_auxmat1) {
1548       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1549       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1550     }
1551   }
1552   PetscFunctionReturn(0);
1553 }
1554 
1555 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1556 #undef __FUNCT__
1557 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1558 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1559 {
1560   PetscErrorCode ierr;
1561   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1562   PC_IS*            pcis = (PC_IS*)  (pc->data);
1563   const PetscScalar zero = 0.0;
1564 
1565   PetscFunctionBegin;
1566   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1567   if (applytranspose) {
1568     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1569     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1570   } else {
1571     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1572     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1573   }
1574   /* start communications from local primal nodes to rhs of coarse solver */
1575   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1576   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1577   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1578 
1579   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1580   /* TODO remove null space when doing multilevel */
1581   if (pcbddc->coarse_ksp) {
1582     if (applytranspose) {
1583       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1584     } else {
1585       ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1586     }
1587   }
1588 
1589   /* Local solution on R nodes */
1590   if (pcis->n) {
1591     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1592     ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1593     ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1594     if (pcbddc->switch_static) {
1595       ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1596       ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1597     }
1598     ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1599     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1600     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1601     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1602     if (pcbddc->switch_static) {
1603       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1604       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1605     }
1606   }
1607 
1608   /* communications from coarse sol to local primal nodes */
1609   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1610   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1611 
1612   /* Sum contributions from two levels */
1613   if (applytranspose) {
1614     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1615     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1616   } else {
1617     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1618     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1619   }
1620   PetscFunctionReturn(0);
1621 }
1622 
1623 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */
1624 #undef __FUNCT__
1625 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1626 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1627 {
1628   PetscErrorCode ierr;
1629   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1630   PetscScalar    *array,*array2;
1631   Vec            from,to;
1632 
1633   PetscFunctionBegin;
1634   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1635     from = pcbddc->coarse_vec;
1636     to = pcbddc->vec1_P;
1637     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1638       Vec tvec;
1639       PetscInt lsize;
1640       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1641       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1642       ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1643       ierr = VecGetArray(from,&array2);CHKERRQ(ierr);
1644       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1645       ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1646       ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr);
1647     }
1648   } else { /* from local to global -> put data in coarse right hand side */
1649     from = pcbddc->vec1_P;
1650     to = pcbddc->coarse_vec;
1651   }
1652   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1653   PetscFunctionReturn(0);
1654 }
1655 
1656 #undef __FUNCT__
1657 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1658 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1659 {
1660   PetscErrorCode ierr;
1661   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1662   PetscScalar    *array,*array2;
1663   Vec            from,to;
1664 
1665   PetscFunctionBegin;
1666   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1667     from = pcbddc->coarse_vec;
1668     to = pcbddc->vec1_P;
1669   } else { /* from local to global -> put data in coarse right hand side */
1670     from = pcbddc->vec1_P;
1671     to = pcbddc->coarse_vec;
1672   }
1673   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1674   if (smode == SCATTER_FORWARD) {
1675     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1676       Vec tvec;
1677       PetscInt lsize;
1678       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1679       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1680       ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1681       ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr);
1682       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1683       ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1684       ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr);
1685     }
1686   }
1687   PetscFunctionReturn(0);
1688 }
1689 
1690 /* uncomment for testing purposes */
1691 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1692 #undef __FUNCT__
1693 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1694 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1695 {
1696   PetscErrorCode    ierr;
1697   PC_IS*            pcis = (PC_IS*)(pc->data);
1698   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1699   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1700   /* one and zero */
1701   PetscScalar       one=1.0,zero=0.0;
1702   /* space to store constraints and their local indices */
1703   PetscScalar       *temp_quadrature_constraint;
1704   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1705   /* iterators */
1706   PetscInt          i,j,k,total_counts,temp_start_ptr;
1707   /* stuff to store connected components stored in pcbddc->mat_graph */
1708   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1709   PetscInt          n_ISForFaces,n_ISForEdges;
1710   /* near null space stuff */
1711   MatNullSpace      nearnullsp;
1712   const Vec         *nearnullvecs;
1713   Vec               *localnearnullsp;
1714   PetscBool         nnsp_has_cnst;
1715   PetscInt          nnsp_size;
1716   PetscScalar       *array;
1717   /* BLAS integers */
1718   PetscBLASInt      lwork,lierr;
1719   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1720   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1721   /* LAPACK working arrays for SVD or POD */
1722   PetscBool         skip_lapack;
1723   PetscScalar       *work;
1724   PetscReal         *singular_vals;
1725 #if defined(PETSC_USE_COMPLEX)
1726   PetscReal         *rwork;
1727 #endif
1728 #if defined(PETSC_MISSING_LAPACK_GESVD)
1729   PetscBLASInt      Blas_one_2=1;
1730   PetscScalar       *temp_basis,*correlation_mat;
1731 #else
1732   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1733   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1734 #endif
1735   /* reuse */
1736   PetscInt          olocal_primal_size;
1737   PetscInt          *oprimal_indices_local_idxs;
1738   /* change of basis */
1739   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1740   PetscBool         boolforchange,qr_needed;
1741   PetscBT           touched,change_basis,qr_needed_idx;
1742   /* auxiliary stuff */
1743   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1744   PetscInt          ncc,*gidxs=NULL,*permutation=NULL,*temp_indices_to_constraint_work=NULL;
1745   PetscScalar       *temp_quadrature_constraint_work=NULL;
1746   /* some quantities */
1747   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1748   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1749 
1750 
1751   PetscFunctionBegin;
1752   /* Destroy Mat objects computed previously */
1753   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1754   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1755   /* TODO synch with adaptive selection */
1756   /* Get index sets for faces, edges and vertices from graph */
1757   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1758   /* free unneeded index sets */
1759   if (!pcbddc->use_vertices) {
1760     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1761   }
1762   if (!pcbddc->use_edges) {
1763     for (i=0;i<n_ISForEdges;i++) {
1764       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1765     }
1766     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1767     n_ISForEdges = 0;
1768   }
1769   if (!pcbddc->use_faces) {
1770     for (i=0;i<n_ISForFaces;i++) {
1771       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1772     }
1773     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1774     n_ISForFaces = 0;
1775   }
1776   /* HACKS (the following two blocks of code) */
1777   if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1778     pcbddc->use_change_of_basis = PETSC_TRUE;
1779     if (!ISForEdges) {
1780       pcbddc->use_change_on_faces = PETSC_TRUE;
1781     }
1782   }
1783   if (pcbddc->NullSpace) {
1784     /* use_change_of_basis should be consistent among processors */
1785     PetscBool tbool[2],gbool[2];
1786     tbool [0] = pcbddc->use_change_of_basis;
1787     tbool [1] = pcbddc->use_change_on_faces;
1788     ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1789     pcbddc->use_change_of_basis = gbool[0];
1790     pcbddc->use_change_on_faces = gbool[1];
1791   }
1792   /* print some info */
1793   if (pcbddc->dbg_flag) {
1794     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1795     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1796     i = 0;
1797     if (ISForVertices) {
1798       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1799     }
1800     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1801     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1802     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1803     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1804   }
1805 
1806   if (!pcbddc->adaptive_selection) {
1807     /* check if near null space is attached to global mat */
1808     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1809     if (nearnullsp) {
1810       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1811       /* remove any stored info */
1812       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1813       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1814       /* store information for BDDC solver reuse */
1815       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1816       pcbddc->onearnullspace = nearnullsp;
1817       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1818       for (i=0;i<nnsp_size;i++) {
1819         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1820       }
1821     } else { /* if near null space is not provided BDDC uses constants by default */
1822       nnsp_size = 0;
1823       nnsp_has_cnst = PETSC_TRUE;
1824     }
1825     /* get max number of constraints on a single cc */
1826     max_constraints = nnsp_size;
1827     if (nnsp_has_cnst) max_constraints++;
1828 
1829     /*
1830          Evaluate maximum storage size needed by the procedure
1831          - temp_indices will contain start index of each constraint stored as follows
1832          - temp_indices_to_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1833          - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1834          - temp_quadrature_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself
1835                                                                                                                                                            */
1836     total_counts = n_ISForFaces+n_ISForEdges;
1837     total_counts *= max_constraints;
1838     n_vertices = 0;
1839     if (ISForVertices) {
1840       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1841     }
1842     total_counts += n_vertices;
1843     ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr);
1844     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1845     total_counts = 0;
1846     max_size_of_constraint = 0;
1847     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1848       if (i<n_ISForEdges) {
1849         used_IS = &ISForEdges[i];
1850       } else {
1851         used_IS = &ISForFaces[i-n_ISForEdges];
1852       }
1853       ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1854       total_counts += j;
1855       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1856     }
1857     total_counts *= max_constraints;
1858     total_counts += n_vertices;
1859     ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1860     /* get local part of global near null space vectors */
1861     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1862     for (k=0;k<nnsp_size;k++) {
1863       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1864       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1865       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1866     }
1867 
1868     /* whether or not to skip lapack calls */
1869     skip_lapack = PETSC_TRUE;
1870     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
1871 
1872     /* allocate some auxiliary stuff */
1873     if (!skip_lapack || pcbddc->use_qr_single) {
1874       ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr);
1875     }
1876 
1877     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1878     if (!skip_lapack) {
1879       PetscScalar temp_work;
1880 
1881 #if defined(PETSC_MISSING_LAPACK_GESVD)
1882       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1883       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1884       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1885       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1886 #if defined(PETSC_USE_COMPLEX)
1887       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1888 #endif
1889       /* now we evaluate the optimal workspace using query with lwork=-1 */
1890       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1891       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1892       lwork = -1;
1893       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1894 #if !defined(PETSC_USE_COMPLEX)
1895       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1896 #else
1897       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1898 #endif
1899       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1900       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1901 #else /* on missing GESVD */
1902       /* SVD */
1903       PetscInt max_n,min_n;
1904       max_n = max_size_of_constraint;
1905       min_n = max_constraints;
1906       if (max_size_of_constraint < max_constraints) {
1907         min_n = max_size_of_constraint;
1908         max_n = max_constraints;
1909       }
1910       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1911 #if defined(PETSC_USE_COMPLEX)
1912       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1913 #endif
1914       /* now we evaluate the optimal workspace using query with lwork=-1 */
1915       lwork = -1;
1916       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1917       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1918       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1919       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1920 #if !defined(PETSC_USE_COMPLEX)
1921       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1922 #else
1923       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1924 #endif
1925       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1926       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1927 #endif /* on missing GESVD */
1928       /* Allocate optimal workspace */
1929       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1930       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
1931     }
1932     /* Now we can loop on constraining sets */
1933     total_counts = 0;
1934     temp_indices[0] = 0;
1935     /* vertices */
1936     if (ISForVertices) {
1937       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1938       if (nnsp_has_cnst) { /* consider all vertices */
1939         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1940         for (i=0;i<n_vertices;i++) {
1941           temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1942           temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1943           total_counts++;
1944         }
1945       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1946         PetscBool used_vertex;
1947         for (i=0;i<n_vertices;i++) {
1948           used_vertex = PETSC_FALSE;
1949           k = 0;
1950           while (!used_vertex && k<nnsp_size) {
1951             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1952             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1953               temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1954               temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1955               temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1956               total_counts++;
1957               used_vertex = PETSC_TRUE;
1958             }
1959             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1960             k++;
1961           }
1962         }
1963       }
1964       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1965       n_vertices = total_counts;
1966     }
1967 
1968     /* edges and faces */
1969     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
1970       if (ncc<n_ISForEdges) {
1971         used_IS = &ISForEdges[ncc];
1972         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1973       } else {
1974         used_IS = &ISForFaces[ncc-n_ISForEdges];
1975         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1976       }
1977       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1978       temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1979       ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1980       ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1981       /* change of basis should not be performed on local periodic nodes */
1982       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1983       if (nnsp_has_cnst) {
1984         PetscScalar quad_value;
1985         temp_constraints++;
1986         if (!pcbddc->use_nnsp_true) {
1987           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1988         } else {
1989           quad_value = 1.0;
1990         }
1991         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1992         for (j=0;j<size_of_constraint;j++) {
1993           temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1994         }
1995         /* sort by global ordering if using lapack subroutines (not needed!) */
1996         if (!skip_lapack || pcbddc->use_qr_single) {
1997           ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
1998           for (j=0;j<size_of_constraint;j++) {
1999             permutation[j]=j;
2000           }
2001           ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2002           for (j=0;j<size_of_constraint;j++) {
2003             if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen");
2004           }
2005           for (j=0;j<size_of_constraint;j++) {
2006             temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2007             temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2008           }
2009           ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2010           ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2011         }
2012         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2013         total_counts++;
2014       }
2015       for (k=0;k<nnsp_size;k++) {
2016         PetscReal real_value;
2017         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2018         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2019         for (j=0;j<size_of_constraint;j++) {
2020           temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
2021         }
2022         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2023         /* check if array is null on the connected component */
2024         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2025         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
2026         if (real_value > 0.0) { /* keep indices and values */
2027           /* sort by global ordering if using lapack subroutines */
2028           if (!skip_lapack || pcbddc->use_qr_single) {
2029             ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
2030             for (j=0;j<size_of_constraint;j++) {
2031               permutation[j]=j;
2032             }
2033             ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2034             for (j=0;j<size_of_constraint;j++) {
2035               temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2036               temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2037             }
2038             ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2039             ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2040           }
2041           temp_constraints++;
2042           temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2043           total_counts++;
2044         }
2045       }
2046       ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2047       valid_constraints = temp_constraints;
2048       if (!pcbddc->use_nnsp_true && temp_constraints) {
2049         if (temp_constraints == 1) { /* just normalize the constraint */
2050           PetscScalar norm;
2051           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2052           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2053           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2054           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2055         } else { /* perform SVD */
2056           PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2057 
2058 #if defined(PETSC_MISSING_LAPACK_GESVD)
2059           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2060              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2061              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2062                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2063                 from that computed using LAPACKgesvd
2064              -> This is due to a different computation of eigenvectors in LAPACKheev
2065              -> The quality of the POD-computed basis will be the same */
2066           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2067           /* Store upper triangular part of correlation matrix */
2068           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2069           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2070           for (j=0;j<temp_constraints;j++) {
2071             for (k=0;k<j+1;k++) {
2072               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
2073             }
2074           }
2075           /* compute eigenvalues and eigenvectors of correlation matrix */
2076           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2077           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2078 #if !defined(PETSC_USE_COMPLEX)
2079           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2080 #else
2081           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2082 #endif
2083           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2084           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2085           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2086           j = 0;
2087           while (j < temp_constraints && singular_vals[j] < tol) j++;
2088           total_counts = total_counts-j;
2089           valid_constraints = temp_constraints-j;
2090           /* scale and copy POD basis into used quadrature memory */
2091           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2092           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2093           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2094           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2095           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2096           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2097           if (j<temp_constraints) {
2098             PetscInt ii;
2099             for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
2100             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2101             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2102             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2103             for (k=0;k<temp_constraints-j;k++) {
2104               for (ii=0;ii<size_of_constraint;ii++) {
2105                 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2106               }
2107             }
2108           }
2109 #else  /* on missing GESVD */
2110           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2111           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2112           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2113           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2114 #if !defined(PETSC_USE_COMPLEX)
2115           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
2116 #else
2117           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
2118 #endif
2119           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2120           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2121           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2122           k = temp_constraints;
2123           if (k > size_of_constraint) k = size_of_constraint;
2124           j = 0;
2125           while (j < k && singular_vals[k-j-1] < tol) j++;
2126           valid_constraints = k-j;
2127           total_counts = total_counts-temp_constraints+valid_constraints;
2128 #endif /* on missing GESVD */
2129         }
2130       }
2131       /* setting change_of_basis flag is safe now */
2132       if (boolforchange) {
2133         for (j=0;j<valid_constraints;j++) {
2134           PetscBTSet(change_basis,total_counts-j-1);
2135         }
2136       }
2137     }
2138     /* free workspace */
2139     if (!skip_lapack || pcbddc->use_qr_single) {
2140       ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr);
2141     }
2142     if (!skip_lapack) {
2143       ierr = PetscFree(work);CHKERRQ(ierr);
2144 #if defined(PETSC_USE_COMPLEX)
2145       ierr = PetscFree(rwork);CHKERRQ(ierr);
2146 #endif
2147       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2148 #if defined(PETSC_MISSING_LAPACK_GESVD)
2149       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2150       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2151 #endif
2152     }
2153     for (k=0;k<nnsp_size;k++) {
2154       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2155     }
2156     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2157   } else {
2158     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2159     PetscInt        cum = 0;
2160 
2161     total_counts = 0;
2162     n_vertices = 0;
2163     if (sub_schurs->is_Ej_com) {
2164       ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr);
2165     }
2166     max_constraints = 0;
2167     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2168       total_counts += pcbddc->adaptive_constraints_n[i];
2169       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2170     }
2171     temp_indices = pcbddc->adaptive_constraints_ptrs;
2172     temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs;
2173     temp_quadrature_constraint = pcbddc->adaptive_constraints_data;
2174 
2175 #if 0
2176     printf("Found %d totals\n",total_counts);
2177     for (i=0;i<total_counts;i++) {
2178       printf("const %d, start %d",i,temp_indices[i]);
2179       printf(" end %d:\n",temp_indices[i+1]);
2180       for (j=temp_indices[i];j<temp_indices[i+1];j++) {
2181         printf("  idxs %d",temp_indices_to_constraint[j]);
2182         printf("  data %1.2e\n",temp_quadrature_constraint[j]);
2183       }
2184     }
2185     for (i=0;i<n_vertices;i++) {
2186       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i+n_vertices]);
2187     }
2188     for (i=0;i<sub_schurs->n_subs;i++) {
2189       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2190     }
2191 #endif
2192 
2193     for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]);
2194     ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr);
2195     /* Change of basis */
2196     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2197     if (pcbddc->use_change_of_basis) {
2198       cum = n_vertices;
2199       for (i=0;i<sub_schurs->n_subs;i++) {
2200         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2201           for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) {
2202             ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr);
2203           }
2204         }
2205         cum += pcbddc->adaptive_constraints_n[i+n_vertices];
2206       }
2207     }
2208   }
2209 
2210   /* free index sets of faces, edges and vertices */
2211   for (i=0;i<n_ISForFaces;i++) {
2212     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2213   }
2214   if (n_ISForFaces) {
2215     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2216   }
2217   for (i=0;i<n_ISForEdges;i++) {
2218     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2219   }
2220   if (n_ISForEdges) {
2221     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2222   }
2223   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2224 
2225   /* map temp_indices_to_constraint in boundary numbering */
2226   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
2227   if (i != temp_indices[total_counts]) {
2228     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
2229   }
2230 
2231   /* set quantities in pcbddc data structure and store previous primal size */
2232   /* n_vertices defines the number of subdomain corners in the primal space */
2233   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
2234   olocal_primal_size = pcbddc->local_primal_size;
2235   pcbddc->local_primal_size = total_counts;
2236   pcbddc->n_vertices = n_vertices;
2237   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
2238 
2239   /* Create constraint matrix */
2240   /* The constraint matrix is used to compute the l2g map of primal dofs */
2241   /* so we need to set it up properly either with or without change of basis */
2242   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2243   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2244   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2245   /* array to compute a local numbering of constraints : vertices first then constraints */
2246   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
2247   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
2248   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
2249   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
2250   /* auxiliary stuff for basis change */
2251   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
2252   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
2253 
2254   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2255   total_primal_vertices=0;
2256   for (i=0;i<pcbddc->local_primal_size;i++) {
2257     size_of_constraint=temp_indices[i+1]-temp_indices[i];
2258     if (size_of_constraint == 1) {
2259       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
2260       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
2261       aux_primal_minloc[total_primal_vertices]=0;
2262       total_primal_vertices++;
2263     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
2264       PetscInt min_loc,min_index;
2265       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
2266       /* find first untouched local node */
2267       k = 0;
2268       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
2269       min_index = global_indices[k];
2270       min_loc = k;
2271       /* search the minimum among global nodes already untouched on the cc */
2272       for (k=1;k<size_of_constraint;k++) {
2273         /* there can be more than one constraint on a single connected component */
2274         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
2275           min_index = global_indices[k];
2276           min_loc = k;
2277         }
2278       }
2279       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
2280       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
2281       aux_primal_minloc[total_primal_vertices]=min_loc;
2282       total_primal_vertices++;
2283     }
2284   }
2285   /* determine if a QR strategy is needed for change of basis */
2286   qr_needed = PETSC_FALSE;
2287   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
2288   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2289     if (PetscBTLookup(change_basis,i)) {
2290       if (!pcbddc->use_qr_single) {
2291         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2292         j = 0;
2293         for (k=0;k<size_of_constraint;k++) {
2294           if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
2295             j++;
2296           }
2297         }
2298         /* found more than one primal dof on the cc */
2299         if (j > 1) {
2300           PetscBTSet(qr_needed_idx,i);
2301           qr_needed = PETSC_TRUE;
2302         }
2303       } else {
2304         PetscBTSet(qr_needed_idx,i);
2305         qr_needed = PETSC_TRUE;
2306       }
2307     }
2308   }
2309   /* free workspace */
2310   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2311 
2312   /* permute indices in order to have a sorted set of vertices */
2313   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
2314 
2315   /* nonzero structure of constraint matrix */
2316   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2317   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
2318   j=total_primal_vertices;
2319   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2320     if (!PetscBTLookup(change_basis,i)) {
2321       nnz[j]=temp_indices[i+1]-temp_indices[i];
2322       j++;
2323     }
2324   }
2325   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2326   ierr = PetscFree(nnz);CHKERRQ(ierr);
2327   /* set values in constraint matrix */
2328   for (i=0;i<total_primal_vertices;i++) {
2329     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2330   }
2331   total_counts = total_primal_vertices;
2332   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2333     if (!PetscBTLookup(change_basis,i)) {
2334       size_of_constraint=temp_indices[i+1]-temp_indices[i];
2335       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
2336       total_counts++;
2337     }
2338   }
2339   /* assembling */
2340   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2341   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2342   /*
2343   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2344   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2345   */
2346   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2347   if (pcbddc->use_change_of_basis) {
2348     /* dual and primal dofs on a single cc */
2349     PetscInt     dual_dofs,primal_dofs;
2350     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
2351     PetscInt     primal_counter;
2352     /* working stuff for GEQRF */
2353     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2354     PetscBLASInt lqr_work;
2355     /* working stuff for UNGQR */
2356     PetscScalar  *gqr_work,lgqr_work_t;
2357     PetscBLASInt lgqr_work;
2358     /* working stuff for TRTRS */
2359     PetscScalar  *trs_rhs;
2360     PetscBLASInt Blas_NRHS;
2361     /* pointers for values insertion into change of basis matrix */
2362     PetscInt     *start_rows,*start_cols;
2363     PetscScalar  *start_vals;
2364     /* working stuff for values insertion */
2365     PetscBT      is_primal;
2366     /* matrix sizes */
2367     PetscInt     global_size,local_size;
2368     /* temporary change of basis */
2369     Mat          localChangeOfBasisMatrix;
2370 
2371     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2372     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2373     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2374     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2375     /* nonzeros for local mat */
2376     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2377     for (i=0;i<pcis->n;i++) nnz[i]=1;
2378     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2379       if (PetscBTLookup(change_basis,i)) {
2380         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2381         if (PetscBTLookup(qr_needed_idx,i)) {
2382           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2383         } else {
2384           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2;
2385           /* get local primal index on the cc */
2386           j = 0;
2387           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
2388           nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2389         }
2390       }
2391     }
2392     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2393     ierr = PetscFree(nnz);CHKERRQ(ierr);
2394     /* Set initial identity in the matrix */
2395     for (i=0;i<pcis->n;i++) {
2396       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2397     }
2398 
2399     if (pcbddc->dbg_flag) {
2400       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2401       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2402     }
2403 
2404 
2405     /* Now we loop on the constraints which need a change of basis */
2406     /*
2407        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2408        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2409 
2410        Basic blocks of change of basis matrix T computed by
2411 
2412           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2413 
2414             | 1        0   ...        0         s_1/S |
2415             | 0        1   ...        0         s_2/S |
2416             |              ...                        |
2417             | 0        ...            1     s_{n-1}/S |
2418             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2419 
2420             with S = \sum_{i=1}^n s_i^2
2421             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2422                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2423 
2424           - QR decomposition of constraints otherwise
2425     */
2426     if (qr_needed) {
2427       /* space to store Q */
2428       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2429       /* first we issue queries for optimal work */
2430       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2431       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2432       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2433       lqr_work = -1;
2434       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2435       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2436       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2437       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2438       lgqr_work = -1;
2439       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2440       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2441       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2442       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2443       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2444       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2445       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2446       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2447       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2448       /* array to store scaling factors for reflectors */
2449       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2450       /* array to store rhs and solution of triangular solver */
2451       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2452       /* allocating workspace for check */
2453       if (pcbddc->dbg_flag) {
2454         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr);
2455       }
2456     }
2457     /* array to store whether a node is primal or not */
2458     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2459     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2460     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2461     if (i != total_primal_vertices) {
2462       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2463     }
2464     for (i=0;i<total_primal_vertices;i++) {
2465       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2466     }
2467     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2468 
2469     /* loop on constraints and see whether or not they need a change of basis and compute it */
2470     /* -> using implicit ordering contained in temp_indices data */
2471     total_counts = pcbddc->n_vertices;
2472     primal_counter = total_counts;
2473     while (total_counts<pcbddc->local_primal_size) {
2474       primal_dofs = 1;
2475       if (PetscBTLookup(change_basis,total_counts)) {
2476         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2477         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) {
2478           primal_dofs++;
2479         }
2480         /* get constraint info */
2481         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2482         dual_dofs = size_of_constraint-primal_dofs;
2483 
2484         if (pcbddc->dbg_flag) {
2485           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2486         }
2487 
2488         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2489 
2490           /* copy quadrature constraints for change of basis check */
2491           if (pcbddc->dbg_flag) {
2492             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2493           }
2494           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2495           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2496 
2497           /* compute QR decomposition of constraints */
2498           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2499           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2500           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2501           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2502           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2503           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2504           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2505 
2506           /* explictly compute R^-T */
2507           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2508           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2509           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2510           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2511           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2512           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2513           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2514           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2515           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2516           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2517 
2518           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2519           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2520           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2521           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2522           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2523           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2524           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2525           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2526           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2527 
2528           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2529              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2530              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2531           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2532           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2533           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2534           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2535           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2536           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2537           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2538           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2539           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2540           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2541 
2542           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2543           start_rows = &temp_indices_to_constraint[temp_indices[total_counts]];
2544           /* insert cols for primal dofs */
2545           for (j=0;j<primal_dofs;j++) {
2546             start_vals = &qr_basis[j*size_of_constraint];
2547             start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2548             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2549           }
2550           /* insert cols for dual dofs */
2551           for (j=0,k=0;j<dual_dofs;k++) {
2552             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2553               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2554               start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k];
2555               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2556               j++;
2557             }
2558           }
2559 
2560           /* check change of basis */
2561           if (pcbddc->dbg_flag) {
2562             PetscInt   ii,jj;
2563             PetscBool valid_qr=PETSC_TRUE;
2564             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2565             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2566             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2567             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2568             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2569             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2570             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2571             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2572             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2573             for (jj=0;jj<size_of_constraint;jj++) {
2574               for (ii=0;ii<primal_dofs;ii++) {
2575                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2576                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2577               }
2578             }
2579             if (!valid_qr) {
2580               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2581               for (jj=0;jj<size_of_constraint;jj++) {
2582                 for (ii=0;ii<primal_dofs;ii++) {
2583                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2584                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2585                   }
2586                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2587                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2588                   }
2589                 }
2590               }
2591             } else {
2592               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2593             }
2594           }
2595         } else { /* simple transformation block */
2596           PetscInt    row,col;
2597           PetscScalar val,norm;
2598 
2599           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2600           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one));
2601           for (j=0;j<size_of_constraint;j++) {
2602             PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2603             row = temp_indices_to_constraint[temp_indices[total_counts]+j];
2604             if (!PetscBTLookup(is_primal,row_B)) {
2605               col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2606               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2607               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2608             } else {
2609               for (k=0;k<size_of_constraint;k++) {
2610                 col = temp_indices_to_constraint[temp_indices[total_counts]+k];
2611                 if (row != col) {
2612                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2613                 } else {
2614                   val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm;
2615                 }
2616                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2617               }
2618             }
2619           }
2620           if (pcbddc->dbg_flag) {
2621             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2622           }
2623         }
2624         /* increment primal counter */
2625         primal_counter += primal_dofs;
2626       } else {
2627         if (pcbddc->dbg_flag) {
2628           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2629         }
2630       }
2631       /* increment constraint counter total_counts */
2632       total_counts += primal_dofs;
2633     }
2634 
2635     /* free workspace */
2636     if (qr_needed) {
2637       if (pcbddc->dbg_flag) {
2638         ierr = PetscFree(work);CHKERRQ(ierr);
2639       }
2640       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2641       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2642       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2643       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2644       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2645     }
2646     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2647     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2648     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2649 
2650     /* assembling of global change of variable */
2651     {
2652       Mat      tmat;
2653       PetscInt bs;
2654 
2655       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2656       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2657       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2658       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2659       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2660       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2661       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
2662       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
2663       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2664       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2665       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2666       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2667       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2668       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2669       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2670       ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2671       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2672       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2673     }
2674     /* check */
2675     if (pcbddc->dbg_flag) {
2676       PetscReal error;
2677       Vec       x,x_change;
2678 
2679       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2680       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2681       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2682       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2683       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2684       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2685       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2686       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2687       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2688       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2689       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2690       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2691       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2692       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2693       ierr = VecDestroy(&x);CHKERRQ(ierr);
2694       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2695     }
2696 
2697     /* adapt sub_schurs computed (if any) */
2698     if (pcbddc->use_deluxe_scaling) {
2699       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2700       if (sub_schurs->n_subs_par_g) {
2701         SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Change of basis with deluxe scaling and parallel problems still needs to be implemented");
2702       }
2703       if (sub_schurs->S_Ej_all) {
2704         Mat S_1,S_2,tmat;
2705         IS is_all_N;
2706 
2707         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2708         ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
2709         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2710         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr);
2711         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2712         sub_schurs->S_Ej_all = S_1;
2713         ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr);
2714         ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2715         sub_schurs->sum_S_Ej_all = S_2;
2716         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2717       }
2718     }
2719     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2720   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2721     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2722     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2723   }
2724 
2725   /* set up change of basis context */
2726   if (pcbddc->ChangeOfBasisMatrix) {
2727     PCBDDCChange_ctx change_ctx;
2728 
2729     if (!pcbddc->new_global_mat) {
2730       PetscInt global_size,local_size;
2731 
2732       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2733       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2734       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2735       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2736       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2737       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2738       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2739       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2740       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2741     } else {
2742       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2743       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2744       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2745     }
2746     if (!pcbddc->user_ChangeOfBasisMatrix) {
2747       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2748       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2749     } else {
2750       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2751       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2752     }
2753     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2754     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2755     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2756     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2757   }
2758 
2759   /* get indices in local ordering for vertices and constraints */
2760   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2761     ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
2762     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2763   }
2764   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2765   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2766   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2767   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2768   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2769   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2770   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2771   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2772   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2773   /* set quantities in PCBDDC data struct */
2774   pcbddc->n_actual_vertices = i;
2775   /* check if a new primal space has been introduced */
2776   pcbddc->new_primal_space_local = PETSC_TRUE;
2777   if (olocal_primal_size == pcbddc->local_primal_size) {
2778     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2779     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2780     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2781   }
2782   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2783   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2784 
2785   /* flush dbg viewer */
2786   if (pcbddc->dbg_flag) {
2787     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2788   }
2789 
2790   /* free workspace */
2791   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2792   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2793   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2794   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2795   if (!pcbddc->adaptive_selection) {
2796     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2797     ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr);
2798   } else {
2799     ierr = PetscFree4(pcbddc->adaptive_constraints_n,
2800                       pcbddc->adaptive_constraints_ptrs,
2801                       pcbddc->adaptive_constraints_idxs,
2802                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2803     ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2804   }
2805   PetscFunctionReturn(0);
2806 }
2807 
2808 #undef __FUNCT__
2809 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2810 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2811 {
2812   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2813   PC_IS       *pcis = (PC_IS*)pc->data;
2814   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2815   PetscInt    ierr,i,vertex_size;
2816   PetscViewer viewer=pcbddc->dbg_viewer;
2817 
2818   PetscFunctionBegin;
2819   /* Reset previously computed graph */
2820   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2821   /* Init local Graph struct */
2822   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2823 
2824   /* Check validity of the csr graph passed in by the user */
2825   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2826     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2827   }
2828 
2829   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2830   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2831     PetscInt *xadj,*adjncy;
2832     PetscInt nvtxs;
2833 
2834     if (pcbddc->use_local_adj) {
2835       PetscBool flg_row=PETSC_FALSE;
2836 
2837       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2838       if (flg_row) {
2839         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2840         pcbddc->computed_rowadj = PETSC_TRUE;
2841       }
2842       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2843     } else if (pcbddc->current_level) { /* just compute subdomain's connected components for coarser levels */
2844       IS                     is_dummy;
2845       ISLocalToGlobalMapping l2gmap_dummy;
2846       PetscInt               j,sum;
2847       PetscInt               *cxadj,*cadjncy;
2848       const PetscInt         *idxs;
2849       PCBDDCGraph            graph;
2850       PetscBT                is_on_boundary;
2851 
2852       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
2853       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
2854       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
2855       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
2856       ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr);
2857       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
2858       graph->xadj = xadj;
2859       graph->adjncy = adjncy;
2860       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
2861       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
2862 
2863       if (pcbddc->dbg_flag) {
2864         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr);
2865         for (i=0;i<graph->ncc;i++) {
2866           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
2867         }
2868         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2869       }
2870 
2871       ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr);
2872       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2873       for (i=0;i<pcis->n_B;i++) {
2874         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
2875       }
2876       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2877 
2878       ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr);
2879       sum = 0;
2880       for (i=0;i<graph->ncc;i++) {
2881         PetscInt sizecc = 0;
2882         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2883           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2884             sizecc++;
2885           }
2886         }
2887         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2888           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2889             cxadj[graph->queue[j]] = sizecc;
2890           }
2891         }
2892         sum += sizecc*sizecc;
2893       }
2894       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
2895       sum = 0;
2896       for (i=0;i<nvtxs;i++) {
2897         PetscInt temp = cxadj[i];
2898         cxadj[i] = sum;
2899         sum += temp;
2900       }
2901       cxadj[nvtxs] = sum;
2902       for (i=0;i<graph->ncc;i++) {
2903         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2904           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2905             PetscInt k,sizecc = 0;
2906             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
2907               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
2908                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
2909                 sizecc++;
2910               }
2911             }
2912           }
2913         }
2914       }
2915       if (nvtxs) {
2916         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
2917       } else {
2918         ierr = PetscFree(cxadj);CHKERRQ(ierr);
2919         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
2920       }
2921       graph->xadj = 0;
2922       graph->adjncy = 0;
2923       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
2924       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
2925     }
2926   }
2927 
2928   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2929   vertex_size = 1;
2930   if (pcbddc->user_provided_isfordofs) {
2931     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2932       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2933       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2934         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2935         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2936       }
2937       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2938       pcbddc->n_ISForDofs = 0;
2939       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2940     }
2941     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2942     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2943   } else {
2944     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2945       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2946       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2947       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2948         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2949       }
2950     }
2951   }
2952 
2953   /* Setup of Graph */
2954   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2955     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2956   }
2957   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2958     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2959   }
2960   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
2961 
2962   /* Graph's connected components analysis */
2963   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2964 
2965   /* print some info to stdout */
2966   if (pcbddc->dbg_flag) {
2967     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2968   }
2969 
2970   /* mark topography has done */
2971   pcbddc->recompute_topography = PETSC_FALSE;
2972   PetscFunctionReturn(0);
2973 }
2974 
2975 #undef __FUNCT__
2976 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2977 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2978 {
2979   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2980   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2981   PetscErrorCode ierr;
2982 
2983   PetscFunctionBegin;
2984   n = 0;
2985   vertices = 0;
2986   if (pcbddc->ConstraintMatrix) {
2987     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2988     for (i=0;i<local_primal_size;i++) {
2989       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2990       if (size_of_constraint == 1) n++;
2991       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2992     }
2993     if (vertices_idx) {
2994       ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr);
2995       n = 0;
2996       for (i=0;i<local_primal_size;i++) {
2997         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2998         if (size_of_constraint == 1) {
2999           vertices[n++]=row_cmat_indices[0];
3000         }
3001         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3002       }
3003     }
3004   }
3005   *n_vertices = n;
3006   if (vertices_idx) *vertices_idx = vertices;
3007   PetscFunctionReturn(0);
3008 }
3009 
3010 #undef __FUNCT__
3011 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
3012 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
3013 {
3014   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
3015   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
3016   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
3017   PetscBT        touched;
3018   PetscErrorCode ierr;
3019 
3020     /* This function assumes that the number of local constraints per connected component
3021        is not greater than the number of nodes defined for the connected component
3022        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
3023   PetscFunctionBegin;
3024   n = 0;
3025   constraints_index = 0;
3026   if (pcbddc->ConstraintMatrix) {
3027     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
3028     max_size_of_constraint = 0;
3029     for (i=0;i<local_primal_size;i++) {
3030       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3031       if (size_of_constraint > 1) {
3032         n++;
3033       }
3034       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
3035       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3036     }
3037     if (constraints_idx) {
3038       ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr);
3039       ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr);
3040       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
3041       n = 0;
3042       for (i=0;i<local_primal_size;i++) {
3043         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3044         if (size_of_constraint > 1) {
3045           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
3046           /* find first untouched local node */
3047           j = 0;
3048           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
3049           min_index = row_cmat_global_indices[j];
3050           min_loc = j;
3051           /* search the minimum among nodes not yet touched on the connected component
3052              since there can be more than one constraint on a single cc */
3053           for (j=1;j<size_of_constraint;j++) {
3054             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
3055               min_index = row_cmat_global_indices[j];
3056               min_loc = j;
3057             }
3058           }
3059           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
3060           constraints_index[n++] = row_cmat_indices[min_loc];
3061         }
3062         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3063       }
3064       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
3065       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
3066     }
3067   }
3068   *n_constraints = n;
3069   if (constraints_idx) *constraints_idx = constraints_index;
3070   PetscFunctionReturn(0);
3071 }
3072 
3073 #undef __FUNCT__
3074 #define __FUNCT__ "PCBDDCSubsetNumbering"
3075 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
3076 {
3077   Vec            local_vec,global_vec;
3078   IS             seqis,paris;
3079   VecScatter     scatter_ctx;
3080   PetscScalar    *array;
3081   PetscInt       *temp_global_dofs;
3082   PetscScalar    globalsum;
3083   PetscInt       i,j,s;
3084   PetscInt       nlocals,first_index,old_index,max_local;
3085   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
3086   PetscMPIInt    *dof_sizes,*dof_displs;
3087   PetscBool      first_found;
3088   PetscErrorCode ierr;
3089 
3090   PetscFunctionBegin;
3091   /* mpi buffers */
3092   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
3093   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
3094   j = ( !rank_prec_comm ? size_prec_comm : 0);
3095   ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr);
3096   ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr);
3097   /* get maximum size of subset */
3098   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
3099   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
3100   max_local = 0;
3101   for (i=0;i<n_local_dofs;i++) {
3102     if (max_local < temp_global_dofs[i] ) {
3103       max_local = temp_global_dofs[i];
3104     }
3105   }
3106   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
3107   max_global++;
3108   max_local = 0;
3109   for (i=0;i<n_local_dofs;i++) {
3110     if (max_local < local_dofs[i] ) {
3111       max_local = local_dofs[i];
3112     }
3113   }
3114   max_local++;
3115   /* allocate workspace */
3116   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
3117   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
3118   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
3119   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
3120   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
3121   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
3122   /* create scatter */
3123   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
3124   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
3125   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
3126   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
3127   ierr = ISDestroy(&paris);CHKERRQ(ierr);
3128   /* init array */
3129   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
3130   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3131   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3132   if (local_dofs_mult) {
3133     for (i=0;i<n_local_dofs;i++) {
3134       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
3135     }
3136   } else {
3137     for (i=0;i<n_local_dofs;i++) {
3138       array[local_dofs[i]]=1.0;
3139     }
3140   }
3141   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3142   /* scatter into global vec and get total number of global dofs */
3143   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3144   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3145   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
3146   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
3147   /* Fill global_vec with cumulative function for global numbering */
3148   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
3149   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
3150   nlocals = 0;
3151   first_index = -1;
3152   first_found = PETSC_FALSE;
3153   for (i=0;i<s;i++) {
3154     if (!first_found && PetscRealPart(array[i]) > 0.1) {
3155       first_found = PETSC_TRUE;
3156       first_index = i;
3157     }
3158     nlocals += (PetscInt)PetscRealPart(array[i]);
3159   }
3160   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3161   if (!rank_prec_comm) {
3162     dof_displs[0]=0;
3163     for (i=1;i<size_prec_comm;i++) {
3164       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3165     }
3166   }
3167   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3168   if (first_found) {
3169     array[first_index] += (PetscScalar)nlocals;
3170     old_index = first_index;
3171     for (i=first_index+1;i<s;i++) {
3172       if (PetscRealPart(array[i]) > 0.1) {
3173         array[i] += array[old_index];
3174         old_index = i;
3175       }
3176     }
3177   }
3178   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
3179   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3180   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3181   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3182   /* get global ordering of local dofs */
3183   ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3184   if (local_dofs_mult) {
3185     for (i=0;i<n_local_dofs;i++) {
3186       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
3187     }
3188   } else {
3189     for (i=0;i<n_local_dofs;i++) {
3190       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
3191     }
3192   }
3193   ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3194   /* free workspace */
3195   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
3196   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
3197   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
3198   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
3199   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
3200   /* return pointer to global ordering of local dofs */
3201   *global_numbering_subset = temp_global_dofs;
3202   PetscFunctionReturn(0);
3203 }
3204 
3205 #undef __FUNCT__
3206 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3207 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3208 {
3209   PetscInt       i,j;
3210   PetscScalar    *alphas;
3211   PetscErrorCode ierr;
3212 
3213   PetscFunctionBegin;
3214   /* this implements stabilized Gram-Schmidt */
3215   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3216   for (i=0;i<n;i++) {
3217     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3218     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3219     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3220   }
3221   ierr = PetscFree(alphas);CHKERRQ(ierr);
3222   PetscFunctionReturn(0);
3223 }
3224 
3225 #undef __FUNCT__
3226 #define __FUNCT__ "MatISGetSubassemblingPattern"
3227 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
3228 {
3229   Mat             subdomain_adj;
3230   IS              new_ranks,ranks_send_to;
3231   MatPartitioning partitioner;
3232   Mat_IS          *matis;
3233   PetscInt        n_neighs,*neighs,*n_shared,**shared;
3234   PetscInt        prank;
3235   PetscMPIInt     size,rank,color;
3236   PetscInt        *xadj,*adjncy,*oldranks;
3237   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
3238   PetscInt        i,local_size,threshold=0;
3239   PetscErrorCode  ierr;
3240   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3241   PetscSubcomm    subcomm;
3242 
3243   PetscFunctionBegin;
3244   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3245   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3246   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3247 
3248   /* Get info on mapping */
3249   matis = (Mat_IS*)(mat->data);
3250   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3251   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3252 
3253   /* build local CSR graph of subdomains' connectivity */
3254   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3255   xadj[0] = 0;
3256   xadj[1] = PetscMax(n_neighs-1,0);
3257   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3258   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3259 
3260   if (threshold) {
3261     PetscInt xadj_count = 0;
3262     for (i=1;i<n_neighs;i++) {
3263       if (n_shared[i] > threshold) {
3264         adjncy[xadj_count] = neighs[i];
3265         adjncy_wgt[xadj_count] = n_shared[i];
3266         xadj_count++;
3267       }
3268     }
3269     xadj[1] = xadj_count;
3270   } else {
3271     if (xadj[1]) {
3272       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3273       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3274     }
3275   }
3276   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3277   if (use_square) {
3278     for (i=0;i<xadj[1];i++) {
3279       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3280     }
3281   }
3282   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3283 
3284   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3285 
3286   /*
3287     Restrict work on active processes only.
3288   */
3289   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3290   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3291   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3292   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3293   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3294   if (color) {
3295     ierr = PetscFree(xadj);CHKERRQ(ierr);
3296     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3297     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3298   } else {
3299     PetscInt coarsening_ratio;
3300     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
3301     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3302     prank = rank;
3303     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
3304     /*
3305     for (i=0;i<size;i++) {
3306       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3307     }
3308     */
3309     for (i=0;i<xadj[1];i++) {
3310       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3311     }
3312     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3313     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3314     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3315 
3316     /* Partition */
3317     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
3318     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3319     if (use_vwgt) {
3320       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3321       v_wgt[0] = local_size;
3322       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3323     }
3324     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3325     coarsening_ratio = size/n_subdomains;
3326     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3327     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3328     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3329     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3330 
3331     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3332     if (contiguous) {
3333       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3334     } else {
3335       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3336     }
3337     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3338     /* clean up */
3339     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3340     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3341     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3342     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3343   }
3344   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3345 
3346   /* assemble parallel IS for sends */
3347   i = 1;
3348   if (color) i=0;
3349   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3350 
3351   /* get back IS */
3352   *is_sends = ranks_send_to;
3353   PetscFunctionReturn(0);
3354 }
3355 
3356 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3357 
3358 #undef __FUNCT__
3359 #define __FUNCT__ "MatISSubassemble"
3360 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3361 {
3362   Mat                    local_mat;
3363   Mat_IS                 *matis;
3364   IS                     is_sends_internal;
3365   PetscInt               rows,cols,new_local_rows;
3366   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3367   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3368   ISLocalToGlobalMapping l2gmap;
3369   PetscInt*              l2gmap_indices;
3370   const PetscInt*        is_indices;
3371   MatType                new_local_type;
3372   /* buffers */
3373   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3374   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3375   PetscInt               *recv_buffer_idxs_local;
3376   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3377   /* MPI */
3378   MPI_Comm               comm,comm_n;
3379   PetscSubcomm           subcomm;
3380   PetscMPIInt            n_sends,n_recvs,commsize;
3381   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3382   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3383   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3384   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3385   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3386   PetscErrorCode         ierr;
3387 
3388   PetscFunctionBegin;
3389   /* TODO: add missing checks */
3390   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3391   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3392   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3393   PetscValidLogicalCollectiveInt(mat,nis,7);
3394   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3395   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3396   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3397   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3398   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3399   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3400   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3401   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3402     PetscInt mrows,mcols,mnrows,mncols;
3403     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3404     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3405     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3406     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3407     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3408     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3409   }
3410   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3411   PetscValidLogicalCollectiveInt(mat,bs,0);
3412   /* prepare IS for sending if not provided */
3413   if (!is_sends) {
3414     PetscBool pcontig = PETSC_TRUE;
3415     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3416     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3417   } else {
3418     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3419     is_sends_internal = is_sends;
3420   }
3421 
3422   /* get pointer of MATIS data */
3423   matis = (Mat_IS*)mat->data;
3424 
3425   /* get comm */
3426   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3427 
3428   /* compute number of sends */
3429   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3430   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3431 
3432   /* compute number of receives */
3433   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3434   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3435   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3436   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3437   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3438   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3439   ierr = PetscFree(iflags);CHKERRQ(ierr);
3440 
3441   /* restrict comm if requested */
3442   subcomm = 0;
3443   destroy_mat = PETSC_FALSE;
3444   if (restrict_comm) {
3445     PetscMPIInt color,subcommsize;
3446 
3447     color = 0;
3448     if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */
3449     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3450     subcommsize = commsize - subcommsize;
3451     /* check if reuse has been requested */
3452     if (reuse == MAT_REUSE_MATRIX) {
3453       if (*mat_n) {
3454         PetscMPIInt subcommsize2;
3455         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3456         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3457         comm_n = PetscObjectComm((PetscObject)*mat_n);
3458       } else {
3459         comm_n = PETSC_COMM_SELF;
3460       }
3461     } else { /* MAT_INITIAL_MATRIX */
3462       PetscMPIInt rank;
3463 
3464       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3465       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3466       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3467       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3468       comm_n = subcomm->comm;
3469     }
3470     /* flag to destroy *mat_n if not significative */
3471     if (color) destroy_mat = PETSC_TRUE;
3472   } else {
3473     comm_n = comm;
3474   }
3475 
3476   /* prepare send/receive buffers */
3477   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3478   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3479   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3480   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3481   if (nis) {
3482     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3483   }
3484 
3485   /* Get data from local matrices */
3486   if (!isdense) {
3487     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3488     /* TODO: See below some guidelines on how to prepare the local buffers */
3489     /*
3490        send_buffer_vals should contain the raw values of the local matrix
3491        send_buffer_idxs should contain:
3492        - MatType_PRIVATE type
3493        - PetscInt        size_of_l2gmap
3494        - PetscInt        global_row_indices[size_of_l2gmap]
3495        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3496     */
3497   } else {
3498     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3499     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3500     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3501     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3502     send_buffer_idxs[1] = i;
3503     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3504     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3505     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3506     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3507     for (i=0;i<n_sends;i++) {
3508       ilengths_vals[is_indices[i]] = len*len;
3509       ilengths_idxs[is_indices[i]] = len+2;
3510     }
3511   }
3512   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3513   /* additional is (if any) */
3514   if (nis) {
3515     PetscMPIInt psum;
3516     PetscInt j;
3517     for (j=0,psum=0;j<nis;j++) {
3518       PetscInt plen;
3519       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3520       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3521       psum += len+1; /* indices + lenght */
3522     }
3523     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3524     for (j=0,psum=0;j<nis;j++) {
3525       PetscInt plen;
3526       const PetscInt *is_array_idxs;
3527       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3528       send_buffer_idxs_is[psum] = plen;
3529       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3530       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3531       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3532       psum += plen+1; /* indices + lenght */
3533     }
3534     for (i=0;i<n_sends;i++) {
3535       ilengths_idxs_is[is_indices[i]] = psum;
3536     }
3537     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3538   }
3539 
3540   buf_size_idxs = 0;
3541   buf_size_vals = 0;
3542   buf_size_idxs_is = 0;
3543   for (i=0;i<n_recvs;i++) {
3544     buf_size_idxs += (PetscInt)olengths_idxs[i];
3545     buf_size_vals += (PetscInt)olengths_vals[i];
3546     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3547   }
3548   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3549   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3550   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3551 
3552   /* get new tags for clean communications */
3553   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3554   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3555   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3556 
3557   /* allocate for requests */
3558   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3559   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3560   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3561   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3562   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3563   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3564 
3565   /* communications */
3566   ptr_idxs = recv_buffer_idxs;
3567   ptr_vals = recv_buffer_vals;
3568   ptr_idxs_is = recv_buffer_idxs_is;
3569   for (i=0;i<n_recvs;i++) {
3570     source_dest = onodes[i];
3571     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3572     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3573     ptr_idxs += olengths_idxs[i];
3574     ptr_vals += olengths_vals[i];
3575     if (nis) {
3576       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3577       ptr_idxs_is += olengths_idxs_is[i];
3578     }
3579   }
3580   for (i=0;i<n_sends;i++) {
3581     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3582     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3583     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3584     if (nis) {
3585       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3586     }
3587   }
3588   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3589   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3590 
3591   /* assemble new l2g map */
3592   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3593   ptr_idxs = recv_buffer_idxs;
3594   new_local_rows = 0;
3595   for (i=0;i<n_recvs;i++) {
3596     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3597     ptr_idxs += olengths_idxs[i];
3598   }
3599   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3600   ptr_idxs = recv_buffer_idxs;
3601   new_local_rows = 0;
3602   for (i=0;i<n_recvs;i++) {
3603     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3604     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3605     ptr_idxs += olengths_idxs[i];
3606   }
3607   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3608   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3609   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3610 
3611   /* infer new local matrix type from received local matrices type */
3612   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3613   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3614   if (n_recvs) {
3615     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3616     ptr_idxs = recv_buffer_idxs;
3617     for (i=0;i<n_recvs;i++) {
3618       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3619         new_local_type_private = MATAIJ_PRIVATE;
3620         break;
3621       }
3622       ptr_idxs += olengths_idxs[i];
3623     }
3624     switch (new_local_type_private) {
3625       case MATDENSE_PRIVATE:
3626         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3627           new_local_type = MATSEQAIJ;
3628           bs = 1;
3629         } else { /* if I receive only 1 dense matrix */
3630           new_local_type = MATSEQDENSE;
3631           bs = 1;
3632         }
3633         break;
3634       case MATAIJ_PRIVATE:
3635         new_local_type = MATSEQAIJ;
3636         bs = 1;
3637         break;
3638       case MATBAIJ_PRIVATE:
3639         new_local_type = MATSEQBAIJ;
3640         break;
3641       case MATSBAIJ_PRIVATE:
3642         new_local_type = MATSEQSBAIJ;
3643         break;
3644       default:
3645         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3646         break;
3647     }
3648   } else { /* by default, new_local_type is seqdense */
3649     new_local_type = MATSEQDENSE;
3650     bs = 1;
3651   }
3652 
3653   /* create MATIS object if needed */
3654   if (reuse == MAT_INITIAL_MATRIX) {
3655     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3656     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3657   } else {
3658     /* it also destroys the local matrices */
3659     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3660   }
3661   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3662   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3663 
3664   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3665 
3666   /* Global to local map of received indices */
3667   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3668   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3669   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3670 
3671   /* restore attributes -> type of incoming data and its size */
3672   buf_size_idxs = 0;
3673   for (i=0;i<n_recvs;i++) {
3674     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3675     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3676     buf_size_idxs += (PetscInt)olengths_idxs[i];
3677   }
3678   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3679 
3680   /* set preallocation */
3681   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3682   if (!newisdense) {
3683     PetscInt *new_local_nnz=0;
3684 
3685     ptr_vals = recv_buffer_vals;
3686     ptr_idxs = recv_buffer_idxs_local;
3687     if (n_recvs) {
3688       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3689     }
3690     for (i=0;i<n_recvs;i++) {
3691       PetscInt j;
3692       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3693         for (j=0;j<*(ptr_idxs+1);j++) {
3694           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3695         }
3696       } else {
3697         /* TODO */
3698       }
3699       ptr_idxs += olengths_idxs[i];
3700     }
3701     if (new_local_nnz) {
3702       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3703       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3704       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3705       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3706       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3707       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3708     } else {
3709       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3710     }
3711     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3712   } else {
3713     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3714   }
3715 
3716   /* set values */
3717   ptr_vals = recv_buffer_vals;
3718   ptr_idxs = recv_buffer_idxs_local;
3719   for (i=0;i<n_recvs;i++) {
3720     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3721       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3722       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3723       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3724       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3725       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3726     } else {
3727       /* TODO */
3728     }
3729     ptr_idxs += olengths_idxs[i];
3730     ptr_vals += olengths_vals[i];
3731   }
3732   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3733   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3734   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3735   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3736   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3737   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3738 
3739 #if 0
3740   if (!restrict_comm) { /* check */
3741     Vec       lvec,rvec;
3742     PetscReal infty_error;
3743 
3744     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3745     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3746     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3747     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3748     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3749     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3750     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3751     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3752     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3753   }
3754 #endif
3755 
3756   /* assemble new additional is (if any) */
3757   if (nis) {
3758     PetscInt **temp_idxs,*count_is,j,psum;
3759 
3760     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3761     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3762     ptr_idxs = recv_buffer_idxs_is;
3763     psum = 0;
3764     for (i=0;i<n_recvs;i++) {
3765       for (j=0;j<nis;j++) {
3766         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3767         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3768         psum += plen;
3769         ptr_idxs += plen+1; /* shift pointer to received data */
3770       }
3771     }
3772     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3773     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3774     for (i=1;i<nis;i++) {
3775       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3776     }
3777     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3778     ptr_idxs = recv_buffer_idxs_is;
3779     for (i=0;i<n_recvs;i++) {
3780       for (j=0;j<nis;j++) {
3781         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3782         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3783         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3784         ptr_idxs += plen+1; /* shift pointer to received data */
3785       }
3786     }
3787     for (i=0;i<nis;i++) {
3788       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3789       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3790       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3791     }
3792     ierr = PetscFree(count_is);CHKERRQ(ierr);
3793     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3794     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3795   }
3796   /* free workspace */
3797   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3798   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3799   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3800   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3801   if (isdense) {
3802     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3803     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3804   } else {
3805     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3806   }
3807   if (nis) {
3808     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3809     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3810   }
3811   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3812   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3813   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3814   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3815   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3816   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3817   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3818   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3819   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3820   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3821   ierr = PetscFree(onodes);CHKERRQ(ierr);
3822   if (nis) {
3823     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3824     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3825     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3826   }
3827   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3828   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3829     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3830     for (i=0;i<nis;i++) {
3831       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3832     }
3833   }
3834   PetscFunctionReturn(0);
3835 }
3836 
3837 /* temporary hack into ksp private data structure */
3838 #include <petsc-private/kspimpl.h>
3839 
3840 #undef __FUNCT__
3841 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3842 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3843 {
3844   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3845   PC_IS                  *pcis = (PC_IS*)pc->data;
3846   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3847   MatNullSpace           CoarseNullSpace=NULL;
3848   ISLocalToGlobalMapping coarse_islg;
3849   IS                     coarse_is,*isarray;
3850   PetscInt               i,im_active=-1,active_procs=-1;
3851   PetscInt               nis,nisdofs,nisneu;
3852   PC                     pc_temp;
3853   PCType                 coarse_pc_type;
3854   KSPType                coarse_ksp_type;
3855   PetscBool              multilevel_requested,multilevel_allowed;
3856   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3857   Mat                    t_coarse_mat_is;
3858   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3859   PetscMPIInt            all_procs;
3860   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3861   PetscBool              compute_vecs = PETSC_FALSE;
3862   PetscScalar            *array;
3863   PetscErrorCode         ierr;
3864 
3865   PetscFunctionBegin;
3866   /* Assign global numbering to coarse dofs */
3867   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3868     compute_vecs = PETSC_TRUE;
3869     PetscInt ocoarse_size;
3870     ocoarse_size = pcbddc->coarse_size;
3871     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3872     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3873     /* see if we can avoid some work */
3874     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3875       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3876         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3877         coarse_reuse = PETSC_FALSE;
3878       } else { /* we can safely reuse already computed coarse matrix */
3879         coarse_reuse = PETSC_TRUE;
3880       }
3881     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3882       coarse_reuse = PETSC_FALSE;
3883     }
3884     /* reset any subassembling information */
3885     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3886     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3887   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3888     coarse_reuse = PETSC_TRUE;
3889   }
3890 
3891   /* count "active" (i.e. with positive local size) and "void" processes */
3892   im_active = !!(pcis->n);
3893   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3894   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3895   void_procs = all_procs-active_procs;
3896   csin_type_simple = PETSC_TRUE;
3897   redist = PETSC_FALSE;
3898   if (pcbddc->current_level && void_procs) {
3899     csin_ml = PETSC_TRUE;
3900     ncoarse_ml = void_procs;
3901     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
3902     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
3903       csin_ds = PETSC_TRUE;
3904       ncoarse_ds = pcbddc->redistribute_coarse;
3905       redist = PETSC_TRUE;
3906     } else {
3907       csin_ds = PETSC_TRUE;
3908       ncoarse_ds = active_procs;
3909       redist = PETSC_TRUE;
3910     }
3911   } else {
3912     csin_ml = PETSC_FALSE;
3913     ncoarse_ml = all_procs;
3914     if (void_procs) {
3915       csin_ds = PETSC_TRUE;
3916       ncoarse_ds = void_procs;
3917       csin_type_simple = PETSC_FALSE;
3918     } else {
3919       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
3920         csin_ds = PETSC_TRUE;
3921         ncoarse_ds = pcbddc->redistribute_coarse;
3922         redist = PETSC_TRUE;
3923       } else {
3924         csin_ds = PETSC_FALSE;
3925         ncoarse_ds = all_procs;
3926       }
3927     }
3928   }
3929 
3930   /*
3931     test if we can go multilevel: three conditions must be satisfied:
3932     - we have not exceeded the number of levels requested
3933     - we can actually subassemble the active processes
3934     - we can find a suitable number of MPI processes where we can place the subassembled problem
3935   */
3936   multilevel_allowed = PETSC_FALSE;
3937   multilevel_requested = PETSC_FALSE;
3938   if (pcbddc->current_level < pcbddc->max_levels) {
3939     multilevel_requested = PETSC_TRUE;
3940     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3941       multilevel_allowed = PETSC_FALSE;
3942     } else {
3943       multilevel_allowed = PETSC_TRUE;
3944     }
3945   }
3946   /* determine number of process partecipating to coarse solver */
3947   if (multilevel_allowed) {
3948     ncoarse = ncoarse_ml;
3949     csin = csin_ml;
3950   } else {
3951     ncoarse = ncoarse_ds;
3952     csin = csin_ds;
3953   }
3954 
3955   /* creates temporary l2gmap and IS for coarse indexes */
3956   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3957   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3958 
3959   /* creates temporary MATIS object for coarse matrix */
3960   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
3961   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3962   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
3963   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3964 #if 0
3965   {
3966     PetscViewer viewer;
3967     char filename[256];
3968     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
3969     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
3970     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3971     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
3972     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3973   }
3974 #endif
3975   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
3976   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3977   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3978   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3979   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3980 
3981   /* compute dofs splitting and neumann boundaries for coarse dofs */
3982   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
3983     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3984     const PetscInt         *idxs;
3985     ISLocalToGlobalMapping tmap;
3986 
3987     /* create map between primal indices (in local representative ordering) and local primal numbering */
3988     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3989     /* allocate space for temporary storage */
3990     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
3991     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
3992     /* allocate for IS array */
3993     nisdofs = pcbddc->n_ISForDofsLocal;
3994     nisneu = !!pcbddc->NeumannBoundariesLocal;
3995     nis = nisdofs + nisneu;
3996     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
3997     /* dofs splitting */
3998     for (i=0;i<nisdofs;i++) {
3999       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4000       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4001       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4002       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4003       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4004       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4005       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4006       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4007     }
4008     /* neumann boundaries */
4009     if (pcbddc->NeumannBoundariesLocal) {
4010       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4011       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4012       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4013       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4014       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4015       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4016       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4017       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4018     }
4019     /* free memory */
4020     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4021     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4022     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4023   } else {
4024     nis = 0;
4025     nisdofs = 0;
4026     nisneu = 0;
4027     isarray = NULL;
4028   }
4029   /* destroy no longer needed map */
4030   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4031 
4032   /* restrict on coarse candidates (if needed) */
4033   coarse_mat_is = NULL;
4034   if (csin) {
4035     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4036       if (redist) {
4037         PetscMPIInt rank;
4038         PetscInt    spc,n_spc_p1,dest[1],destsize;
4039 
4040         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4041         spc = active_procs/pcbddc->redistribute_coarse;
4042         n_spc_p1 = active_procs%pcbddc->redistribute_coarse;
4043         if (im_active) {
4044           destsize = 1;
4045           if (rank > n_spc_p1*(spc+1)-1) {
4046             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4047           } else {
4048             dest[0] = rank/(spc+1);
4049           }
4050         } else {
4051           destsize = 0;
4052         }
4053         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4054       } else if (csin_type_simple) {
4055         PetscMPIInt rank;
4056         PetscInt    issize,isidx;
4057 
4058         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4059         if (im_active) {
4060           issize = 1;
4061           isidx = (PetscInt)rank;
4062         } else {
4063           issize = 0;
4064           isidx = -1;
4065         }
4066         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4067       } else { /* get a suitable subassembling pattern from MATIS code */
4068         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4069       }
4070 
4071       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4072       if (!redist || ncoarse <= void_procs) {
4073         PetscInt ncoarse_cand,tissize,*nisindices;
4074         PetscInt *coarse_candidates;
4075         const PetscInt* tisindices;
4076 
4077         /* get coarse candidates' ranks in pc communicator */
4078         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4079         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4080         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4081           if (!coarse_candidates[i]) {
4082             coarse_candidates[ncoarse_cand++]=i;
4083           }
4084         }
4085         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4086 
4087 
4088         if (pcbddc->dbg_flag) {
4089           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4090           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4091           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4092           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4093           for (i=0;i<ncoarse_cand;i++) {
4094             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4095           }
4096           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4097           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4098         }
4099         /* shift the pattern on coarse candidates */
4100         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4101         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4102         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4103         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4104         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4105         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4106         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4107       }
4108       if (pcbddc->dbg_flag) {
4109         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4110         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4111         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4112         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4113       }
4114     }
4115     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4116     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4117   } else {
4118     if (pcbddc->dbg_flag) {
4119       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4120       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4121       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4122     }
4123     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4124     coarse_mat_is = t_coarse_mat_is;
4125   }
4126 
4127   /* create local to global scatters for coarse problem */
4128   if (compute_vecs) {
4129     PetscInt lrows;
4130     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4131     if (coarse_mat_is) {
4132       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4133     } else {
4134       lrows = 0;
4135     }
4136     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4137     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4138     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4139     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4140     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4141   }
4142   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4143   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4144 
4145   /* set defaults for coarse KSP and PC */
4146   if (multilevel_allowed) {
4147     coarse_ksp_type = KSPRICHARDSON;
4148     coarse_pc_type = PCBDDC;
4149   } else {
4150     coarse_ksp_type = KSPPREONLY;
4151     coarse_pc_type = PCREDUNDANT;
4152   }
4153 
4154   /* print some info if requested */
4155   if (pcbddc->dbg_flag) {
4156     if (!multilevel_allowed) {
4157       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4158       if (multilevel_requested) {
4159         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4160       } else if (pcbddc->max_levels) {
4161         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4162       }
4163       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4164     }
4165   }
4166 
4167   /* create the coarse KSP object only once with defaults */
4168   if (coarse_mat_is) {
4169     MatReuse coarse_mat_reuse;
4170     PetscViewer dbg_viewer = NULL;
4171     if (pcbddc->dbg_flag) {
4172       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4173       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4174     }
4175     if (!pcbddc->coarse_ksp) {
4176       char prefix[256],str_level[16];
4177       size_t len;
4178       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4179       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4180       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4181       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4182       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4183       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4184       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4185       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4186       /* prefix */
4187       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4188       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4189       if (!pcbddc->current_level) {
4190         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4191         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4192       } else {
4193         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4194         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4195         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4196         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4197         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4198         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4199       }
4200       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4201       /* allow user customization */
4202       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4203       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4204     }
4205 
4206     /* get some info after set from options */
4207     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4208     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4209     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4210     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4211     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4212       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4213       isbddc = PETSC_FALSE;
4214     }
4215     if (isredundant) {
4216       KSP inner_ksp;
4217       PC inner_pc;
4218       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4219       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4220       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4221     }
4222 
4223     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4224     ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4225     ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4226     ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4227     if (nisdofs) {
4228       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4229       for (i=0;i<nisdofs;i++) {
4230         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4231       }
4232     }
4233     if (nisneu) {
4234       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4235       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4236     }
4237 
4238     /* assemble coarse matrix */
4239     if (coarse_reuse) {
4240       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4241       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4242       coarse_mat_reuse = MAT_REUSE_MATRIX;
4243     } else {
4244       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4245     }
4246     if (isbddc || isnn) {
4247       if (pcbddc->coarsening_ratio > 1) {
4248         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4249           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4250           if (pcbddc->dbg_flag) {
4251             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4252             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4253             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4254             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4255           }
4256         }
4257         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4258       } else {
4259         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4260         coarse_mat = coarse_mat_is;
4261       }
4262     } else {
4263       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4264     }
4265     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4266 
4267     /* propagate symmetry info to coarse matrix */
4268     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
4269     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4270 
4271     /* set operators */
4272     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4273     if (pcbddc->dbg_flag) {
4274       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4275     }
4276   } else { /* processes non partecipating to coarse solver (if any) */
4277     coarse_mat = 0;
4278   }
4279   ierr = PetscFree(isarray);CHKERRQ(ierr);
4280 #if 0
4281   {
4282     PetscViewer viewer;
4283     char filename[256];
4284     sprintf(filename,"coarse_mat.m");
4285     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4286     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4287     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4288     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4289   }
4290 #endif
4291 
4292   /* Compute coarse null space (special handling by BDDC only) */
4293   if (pcbddc->NullSpace) {
4294     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4295   }
4296 
4297   if (pcbddc->coarse_ksp) {
4298     Vec crhs,csol;
4299     PetscBool ispreonly;
4300     if (CoarseNullSpace) {
4301       if (isbddc) {
4302         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4303       } else {
4304         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4305       }
4306     }
4307     /* setup coarse ksp */
4308     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4309     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4310     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4311     /* hack */
4312     if (!csol) {
4313       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4314     }
4315     if (!crhs) {
4316       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4317     }
4318     /* Check coarse problem if in debug mode or if solving with an iterative method */
4319     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4320     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4321       KSP       check_ksp;
4322       KSPType   check_ksp_type;
4323       PC        check_pc;
4324       Vec       check_vec,coarse_vec;
4325       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4326       PetscInt  its;
4327       PetscBool compute_eigs;
4328       PetscReal *eigs_r,*eigs_c;
4329       PetscInt  neigs;
4330       const char *prefix;
4331 
4332       /* Create ksp object suitable for estimation of extreme eigenvalues */
4333       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4334       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4335       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4336       if (ispreonly) {
4337         check_ksp_type = KSPPREONLY;
4338         compute_eigs = PETSC_FALSE;
4339       } else {
4340         check_ksp_type = KSPGMRES;
4341         compute_eigs = PETSC_TRUE;
4342       }
4343       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4344       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4345       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4346       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4347       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4348       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4349       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4350       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4351       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4352       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4353       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4354       /* create random vec */
4355       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4356       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4357       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4358       if (CoarseNullSpace) {
4359         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4360       }
4361       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4362       /* solve coarse problem */
4363       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4364       if (CoarseNullSpace) {
4365         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4366       }
4367       /* set eigenvalue estimation if preonly has not been requested */
4368       if (compute_eigs) {
4369         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4370         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4371         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4372         lambda_max = eigs_r[neigs-1];
4373         lambda_min = eigs_r[0];
4374         if (pcbddc->use_coarse_estimates) {
4375           if (lambda_max>lambda_min) {
4376             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4377             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4378           }
4379         }
4380       }
4381 
4382       /* check coarse problem residual error */
4383       if (pcbddc->dbg_flag) {
4384         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4385         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4386         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4387         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4388         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4389         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4390         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4391         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4392         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4393         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4394         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4395         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4396         if (compute_eigs) {
4397           PetscReal lambda_max_s,lambda_min_s;
4398           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4399           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4400           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4401           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4402           for (i=0;i<neigs;i++) {
4403             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4404           }
4405         }
4406         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4407         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4408       }
4409       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4410       if (compute_eigs) {
4411         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4412         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4413       }
4414     }
4415   }
4416   /* print additional info */
4417   if (pcbddc->dbg_flag) {
4418     /* waits until all processes reaches this point */
4419     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4420     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4421     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4422   }
4423 
4424   /* free memory */
4425   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4426   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4427   PetscFunctionReturn(0);
4428 }
4429 
4430 #undef __FUNCT__
4431 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4432 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4433 {
4434   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4435   PC_IS*         pcis = (PC_IS*)pc->data;
4436   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4437   PetscInt       i,coarse_size;
4438   PetscInt       *local_primal_indices;
4439   PetscErrorCode ierr;
4440 
4441   PetscFunctionBegin;
4442   /* Compute global number of coarse dofs */
4443   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
4444     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
4445   }
4446   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
4447 
4448   /* check numbering */
4449   if (pcbddc->dbg_flag) {
4450     PetscScalar coarsesum,*array;
4451     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4452 
4453     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4454     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4455     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4456     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4457     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4458     for (i=0;i<pcbddc->local_primal_size;i++) {
4459       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4460     }
4461     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4462     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4463     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4464     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4465     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4466     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4467     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4468     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4469     for (i=0;i<pcis->n;i++) {
4470       if (array[i] == 1.0) {
4471         set_error = PETSC_TRUE;
4472         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4473       }
4474     }
4475     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4476     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4477     for (i=0;i<pcis->n;i++) {
4478       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4479     }
4480     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4481     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4482     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4483     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4484     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4485     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4486     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4487       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4488       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4489       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4490       for (i=0;i<pcbddc->local_primal_size;i++) {
4491         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
4492       }
4493       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4494     }
4495     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4496     if (set_error_reduced) {
4497       SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4498     }
4499   }
4500   /* get back data */
4501   *coarse_size_n = coarse_size;
4502   *local_primal_indices_n = local_primal_indices;
4503   PetscFunctionReturn(0);
4504 }
4505 
4506 #undef __FUNCT__
4507 #define __FUNCT__ "PCBDDCGlobalToLocal"
4508 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4509 {
4510   IS             localis_t;
4511   PetscInt       i,lsize,*idxs,n;
4512   PetscScalar    *vals;
4513   PetscErrorCode ierr;
4514 
4515   PetscFunctionBegin;
4516   /* get indices in local ordering exploiting local to global map */
4517   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4518   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4519   for (i=0;i<lsize;i++) vals[i] = 1.0;
4520   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4521   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4522   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4523   if (idxs) { /* multilevel guard */
4524     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4525   }
4526   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4527   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4528   ierr = PetscFree(vals);CHKERRQ(ierr);
4529   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4530   /* now compute set in local ordering */
4531   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4532   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4533   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4534   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4535   for (i=0,lsize=0;i<n;i++) {
4536     if (PetscRealPart(vals[i]) > 0.5) {
4537       lsize++;
4538     }
4539   }
4540   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4541   for (i=0,lsize=0;i<n;i++) {
4542     if (PetscRealPart(vals[i]) > 0.5) {
4543       idxs[lsize++] = i;
4544     }
4545   }
4546   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4547   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4548   *localis = localis_t;
4549   PetscFunctionReturn(0);
4550 }
4551 
4552 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4553 #undef __FUNCT__
4554 #define __FUNCT__ "PCBDDCMatMult_Private"
4555 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4556 {
4557   PCBDDCChange_ctx change_ctx;
4558   PetscErrorCode   ierr;
4559 
4560   PetscFunctionBegin;
4561   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4562   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4563   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4564   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4565   PetscFunctionReturn(0);
4566 }
4567 
4568 #undef __FUNCT__
4569 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4570 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4571 {
4572   PCBDDCChange_ctx change_ctx;
4573   PetscErrorCode   ierr;
4574 
4575   PetscFunctionBegin;
4576   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4577   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4578   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4579   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4580   PetscFunctionReturn(0);
4581 }
4582 
4583 #undef __FUNCT__
4584 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4585 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4586 {
4587   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4588   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4589   PetscInt            *used_xadj,*used_adjncy;
4590   PetscBool           free_used_adj;
4591   PetscErrorCode      ierr;
4592 
4593   PetscFunctionBegin;
4594   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4595   free_used_adj = PETSC_FALSE;
4596   if (pcbddc->sub_schurs_layers == -1) {
4597     used_xadj = NULL;
4598     used_adjncy = NULL;
4599   } else {
4600     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4601       used_xadj = pcbddc->mat_graph->xadj;
4602       used_adjncy = pcbddc->mat_graph->adjncy;
4603     } else if (pcbddc->computed_rowadj) {
4604       used_xadj = pcbddc->mat_graph->xadj;
4605       used_adjncy = pcbddc->mat_graph->adjncy;
4606     } else {
4607       PetscBool      flg_row=PETSC_FALSE;
4608       const PetscInt *xadj,*adjncy;
4609       PetscInt       nvtxs;
4610 
4611       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4612       if (flg_row) {
4613         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4614         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4615         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4616         free_used_adj = PETSC_TRUE;
4617       } else {
4618         pcbddc->sub_schurs_layers = -1;
4619         used_xadj = NULL;
4620         used_adjncy = NULL;
4621       }
4622       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4623     }
4624   }
4625   ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->adaptive_selection,pcbddc->use_deluxe_scaling,pcbddc->adaptive_invert_Stildas,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4626 
4627   /* free adjacency */
4628   if (free_used_adj) {
4629     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4630   }
4631   PetscFunctionReturn(0);
4632 }
4633 
4634 #undef __FUNCT__
4635 #define __FUNCT__ "PCBDDCInitSubSchurs"
4636 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4637 {
4638   PC_IS               *pcis=(PC_IS*)pc->data;
4639   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4640   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4641   PCBDDCGraph         graph;
4642   Mat                 S_j;
4643   PetscErrorCode      ierr;
4644 
4645   PetscFunctionBegin;
4646   /* attach interface graph for determining subsets */
4647   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4648     IS verticesIS;
4649 
4650     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4651     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4652     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr);
4653     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr);
4654     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4655     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4656 /*
4657     if (pcbddc->dbg_flag) {
4658       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4659     }
4660 */
4661   } else {
4662     graph = pcbddc->mat_graph;
4663   }
4664 
4665   /* Create Schur complement matrix */
4666   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4667   ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4668 
4669   /* sub_schurs init */
4670   ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_threshold);CHKERRQ(ierr);
4671   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4672   /* free graph struct */
4673   if (pcbddc->sub_schurs_rebuild) {
4674     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4675   }
4676   PetscFunctionReturn(0);
4677 }
4678