xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 16f15bc4fcd020d26c664336e6d9f4fa42602adb)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscScalar     *Smult,*Seigv;
20   PetscReal       *eigs,thresh;
21   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscBool       symmetric;
26   PetscErrorCode  ierr;
27 
28   PetscFunctionBegin;
29   if (!sub_schurs->use_mumps) {
30     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
31   }
32   ierr = MatIsSymmetric(sub_schurs->A,0.0,&symmetric);CHKERRQ(ierr);
33   if (!symmetric) {
34     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented");
35   }
36 
37   /* max size of subsets */
38   mss = 0;
39   for (i=0;i<sub_schurs->n_subs;i++) {
40     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
41       PetscInt subset_size;
42       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
43       mss = PetscMax(mss,subset_size);
44     }
45   }
46 
47   /* min/max and threshold */
48   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
49   nmin = pcbddc->adaptive_nmin > -1 ? pcbddc->adaptive_nmin : 1;
50   nmax = PetscMax(nmin,nmax);
51   if (pcbddc->adaptive_threshold > 1.0) {
52     thresh = 1.0/pcbddc->adaptive_threshold;
53   } else {
54     thresh = 1.0;
55   }
56 
57   /* allocate lapack workspace */
58   cum = cum2 = 0;
59   maxneigs = 0;
60   for (i=0;i<sub_schurs->n_subs;i++) {
61     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
62       PetscInt n,subset_size;
63       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
64       n = PetscMin(subset_size,nmax);
65       cum += subset_size*n;
66       cum2 += n;
67       maxneigs = PetscMax(maxneigs,n);
68     }
69   }
70 
71   if (mss) {
72     if (symmetric) {
73       PetscBLASInt B_itype = 1;
74       PetscBLASInt B_N = mss;
75       PetscScalar  zero = 0.0;
76       PetscScalar  eps = 0.0; /* dlamch? */
77 
78       B_lwork = -1;
79       S = NULL;
80       St = NULL;
81       eigs = NULL;
82       eigv = NULL;
83       B_iwork = NULL;
84       B_ifail = NULL;
85       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
86 #if defined(PETSC_USE_COMPLEX)
87       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
88 #else
89       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
90 #endif
91       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
92       ierr = PetscFPTrapPop();CHKERRQ(ierr);
93     } else {
94         /* TODO */
95     }
96   } else {
97     lwork = 0;
98   }
99 
100   nv = 0;
101   if (sub_schurs->is_Ej_com) { /* complement of subsets, each entry is a vertex */
102     ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr);
103   }
104   ierr = PetscBLASIntCast((PetscInt)lwork,&B_lwork);CHKERRQ(ierr);
105   ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs,
106                       B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
107 #if defined(PETSC_USE_COMPLEX)
108   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
109 #endif
110   ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr);
111   ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
112                       nv+cum2+1,&pcbddc->adaptive_constraints_ptrs,
113                       nv+cum,&pcbddc->adaptive_constraints_idxs,
114                       nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
115   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
116 
117   maxneigs = 0;
118   cum = cum2 = cumarray = 0;
119   if (sub_schurs->is_Ej_com) {
120     const PetscInt *idxs;
121 
122     ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
123     for (cum=0;cum<nv;cum++) {
124       pcbddc->adaptive_constraints_n[cum] = 1;
125       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
126       pcbddc->adaptive_constraints_ptrs[cum] = cum;
127       pcbddc->adaptive_constraints_data[cum] = 1.0;
128     }
129     cum2 = cum;
130     ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
131   }
132 
133   if (mss) { /* multilevel */
134     if (pcbddc->use_deluxe_scaling) {
135       ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
136     } else {
137       ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr);
138     }
139     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
140   }
141 
142   for (i=0;i<sub_schurs->n_subs;i++) {
143     PetscInt j,subset_size;
144 
145     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
146     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
147       const PetscInt *idxs;
148       PetscScalar    one = 1.0,zero=0.0;
149       PetscBLASInt   B_N;
150 
151       /* S should be copied since we need it for deluxe scaling */
152       if (symmetric) {
153         PetscInt j;
154         for (j=0;j<subset_size;j++) {
155           ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
156         }
157         for (j=0;j<subset_size;j++) {
158           ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
159         }
160       } else {
161         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
162         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
163       }
164       /* is always this the right matrix? */
165       ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
166 
167       /* we could reuse space already allocated when building sum_S_Ej_tilda_all */
168       /* St = Starray+cumarray; */
169 
170       if (symmetric) {
171         PetscBLASInt B_itype = 1;
172         PetscBLASInt B_IL = 1, B_IU;
173         PetscScalar  eps = -1.0; /* dlamch? */
174         PetscInt     nmin_s;
175 
176         /* ask for eigenvalues lower than thresh */
177         PetscPrintf(PETSC_COMM_SELF,"[%d] Computing for sub %d/%d.\n",PetscGlobalRank,i,sub_schurs->n_subs);
178         ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
179         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
180 #if defined(PETSC_USE_COMPLEX)
181         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
182 #else
183         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
184 #endif
185         ierr = PetscFPTrapPop();CHKERRQ(ierr);
186         if (B_ierr) {
187           if (B_ierr < 0 ) {
188             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
189           } else if (B_ierr <= B_N) {
190             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
191           } else {
192             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
193           }
194         }
195 
196         if (B_neigs > nmax) {
197           PetscPrintf(PETSC_COMM_SELF,"[%d]   found %d eigs, more than maximum required %d.\n",PetscGlobalRank,B_neigs,nmax);
198           B_neigs = nmax;
199         }
200 
201         nmin_s = PetscMin(nmin,B_N);
202         if (B_neigs < nmin_s) {
203           PetscBLASInt B_neigs2;
204 
205           B_IL = B_neigs + 1;
206           ierr = PetscBLASIntCast(nmin_s,&B_IU);CHKERRQ(ierr);
207           PetscPrintf(PETSC_COMM_SELF,"[%d]   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",PetscGlobalRank,B_neigs,nmin,B_IL,B_IU);
208           if (symmetric) {
209             PetscInt j;
210             for (j=0;j<subset_size;j++) {
211               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
212             }
213             for (j=0;j<subset_size;j++) {
214               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
215             }
216           } else {
217             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
218             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
219           }
220           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
221 #if defined(PETSC_USE_COMPLEX)
222           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
223 #else
224           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
225 #endif
226           ierr = PetscFPTrapPop();CHKERRQ(ierr);
227           B_neigs += B_neigs2;
228         }
229         if (B_ierr) {
230           if (B_ierr < 0 ) {
231             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
232           } else if (B_ierr <= B_N) {
233             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
234           } else {
235             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
236           }
237         }
238         PetscPrintf(PETSC_COMM_SELF,"[%d]   -> Got %d eigs\n",PetscGlobalRank,B_neigs);
239         for (j=0;j<B_neigs;j++) {
240           if (eigs[j] == 0.0) {
241             PetscPrintf(PETSC_COMM_SELF,"[%d]     Inf\n",PetscGlobalRank);
242           } else {
243             PetscPrintf(PETSC_COMM_SELF,"[%d]     %1.6e\n",PetscGlobalRank,1.0/eigs[j]);
244           }
245         }
246       } else {
247           /* TODO */
248       }
249       maxneigs = PetscMax(B_neigs,maxneigs);
250       pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
251 
252       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
253       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&zero,Seigv,&B_N));
254       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
255 #if 0
256       PetscInt ii;
257       for (ii=0;ii<B_neigs;ii++) {
258         PetscPrintf(PETSC_COMM_SELF,"[%d]   -> Eigenvector %d/%d (%d)\n",PetscGlobalRank,ii,B_neigs,B_N);
259         for (j=0;j<B_N;j++) {
260           PetscPrintf(PETSC_COMM_SELF,"[%d]       %1.4e %1.4e\n",PetscGlobalRank,eigv[ii*B_N+j],Seigv[ii*B_N+j]);
261         }
262       }
263 #endif
264       ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
265       for (j=0;j<B_neigs;j++) {
266 #if 0
267         {
268           PetscBLASInt Blas_N,Blas_one = 1.0;
269           PetscScalar norm;
270           ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
271           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
272           if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
273             norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
274           } else {
275             norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
276           }
277           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
278         }
279 #endif
280         ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
281         pcbddc->adaptive_constraints_ptrs[cum++] = cum2;
282         cum2 += subset_size;
283       }
284       ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
285     }
286     /* shift for next computation */
287     cumarray += subset_size*subset_size;
288   }
289   pcbddc->adaptive_constraints_ptrs[cum] = cum2;
290   ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr);
291 
292   if (mss) {
293     if (pcbddc->use_deluxe_scaling) {
294       ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
295     } else {
296       ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr);
297     }
298     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
299   }
300   ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
301 #if defined(PETSC_USE_COMPLEX)
302   ierr = PetscFree(rwork);CHKERRQ(ierr);
303 #endif
304   if (pcbddc->dbg_flag) {
305     PetscInt maxneigs_r;
306     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
307     ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
308   }
309   PetscFunctionReturn(0);
310 }
311 
312 #undef __FUNCT__
313 #define __FUNCT__ "PCBDDCSetUpSolvers"
314 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
315 {
316   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
317   PetscScalar    *coarse_submat_vals;
318   PetscErrorCode ierr;
319 
320   PetscFunctionBegin;
321   /* Setup local scatters R_to_B and (optionally) R_to_D */
322   /* PCBDDCSetUpLocalWorkVectors should be called first! */
323   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
324 
325   /* Setup local neumann solver ksp_R */
326   /* PCBDDCSetUpLocalScatters should be called first! */
327   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
328 
329   /* Change global null space passed in by the user if change of basis has been requested */
330   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
331     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
332   }
333 
334   /*
335      Setup local correction and local part of coarse basis.
336      Gives back the dense local part of the coarse matrix in column major ordering
337   */
338   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
339 
340   /* Compute total number of coarse nodes and setup coarse solver */
341   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
342 
343   /* free */
344   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
345   PetscFunctionReturn(0);
346 }
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "PCBDDCResetCustomization"
350 PetscErrorCode PCBDDCResetCustomization(PC pc)
351 {
352   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
353   PetscErrorCode ierr;
354 
355   PetscFunctionBegin;
356   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
357   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
358   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
359   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
360   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
361   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
362   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
363   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
364   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
365   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
366   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
367   PetscFunctionReturn(0);
368 }
369 
370 #undef __FUNCT__
371 #define __FUNCT__ "PCBDDCResetTopography"
372 PetscErrorCode PCBDDCResetTopography(PC pc)
373 {
374   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
375   PetscErrorCode ierr;
376 
377   PetscFunctionBegin;
378   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
379   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
380   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
381   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
382   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
383   PetscFunctionReturn(0);
384 }
385 
386 #undef __FUNCT__
387 #define __FUNCT__ "PCBDDCResetSolvers"
388 PetscErrorCode PCBDDCResetSolvers(PC pc)
389 {
390   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
391   PetscScalar    *array;
392   PetscErrorCode ierr;
393 
394   PetscFunctionBegin;
395   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
396   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
397   ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
398   ierr = PetscFree(array);CHKERRQ(ierr);
399   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
400   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
401   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
402   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
403   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
404   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
405   if (pcbddc->local_auxmat2) {
406     ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
407     ierr = PetscFree(array);CHKERRQ(ierr);
408   }
409   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
410   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
411   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
412   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
413   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
414   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
415   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
416   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
417   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
418   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
419   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
420   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
421   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
422   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
423   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
424   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
425   PetscFunctionReturn(0);
426 }
427 
428 #undef __FUNCT__
429 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
430 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
431 {
432   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
433   PC_IS          *pcis = (PC_IS*)pc->data;
434   VecType        impVecType;
435   PetscInt       n_constraints,n_R,old_size;
436   PetscErrorCode ierr;
437 
438   PetscFunctionBegin;
439   if (!pcbddc->ConstraintMatrix) {
440     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
441   }
442   /* get sizes */
443   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
444   n_R = pcis->n-pcbddc->n_actual_vertices;
445   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
446   /* local work vectors (try to avoid unneeded work)*/
447   /* R nodes */
448   old_size = -1;
449   if (pcbddc->vec1_R) {
450     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
451   }
452   if (n_R != old_size) {
453     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
454     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
455     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
456     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
457     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
458     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
459   }
460   /* local primal dofs */
461   old_size = -1;
462   if (pcbddc->vec1_P) {
463     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
464   }
465   if (pcbddc->local_primal_size != old_size) {
466     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
467     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
468     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
469     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
470   }
471   /* local explicit constraints */
472   old_size = -1;
473   if (pcbddc->vec1_C) {
474     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
475   }
476   if (n_constraints && n_constraints != old_size) {
477     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
478     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
479     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
480     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
481   }
482   PetscFunctionReturn(0);
483 }
484 
485 #undef __FUNCT__
486 #define __FUNCT__ "PCBDDCSetUpCorrection"
487 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
488 {
489   PetscErrorCode         ierr;
490   /* pointers to pcis and pcbddc */
491   PC_IS*                 pcis = (PC_IS*)pc->data;
492   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
493   /* submatrices of local problem */
494   Mat                    A_RV,A_VR,A_VV;
495   /* submatrices of local coarse problem */
496   Mat                    S_VV,S_CV,S_VC,S_CC;
497   /* working matrices */
498   Mat                    C_CR;
499   /* additional working stuff */
500   PC                     pc_R;
501   Mat                    F;
502   PetscBool              isLU,isCHOL,isILU;
503 
504   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
505   PetscScalar            *work;
506   PetscInt               *idx_V_B;
507   PetscInt               n,n_vertices,n_constraints;
508   PetscInt               i,n_R,n_D,n_B;
509   PetscBool              unsymmetric_check;
510   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
511   MatType                impMatType;
512   /* some shortcuts to scalars */
513   PetscScalar            one=1.0,m_one=-1.0;
514 
515   PetscFunctionBegin;
516   /* get number of vertices (corners plus constraints with change of basis)
517      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
518   n_vertices = pcbddc->n_actual_vertices;
519   n_constraints = pcbddc->local_primal_size-n_vertices;
520   /* Set Non-overlapping dimensions */
521   n_B = pcis->n_B; n_D = pcis->n - n_B;
522   n_R = pcis->n-n_vertices;
523 
524   /* Set types for local objects needed by BDDC precondtioner */
525   impMatType = MATSEQDENSE;
526 
527   /* vertices in boundary numbering */
528   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
529   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
530   if (i != n_vertices) {
531     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
532   }
533 
534   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
535   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
536   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
537   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
538   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
539   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
540   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
541   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
542   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
543   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
544 
545   unsymmetric_check = PETSC_FALSE;
546   /* allocate workspace */
547   n = 0;
548   if (n_constraints) {
549     n += n_R*n_constraints;
550   }
551   if (n_vertices) {
552     n = PetscMax(2*n_R*n_vertices,n);
553   }
554   if (!pcbddc->issym) {
555     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
556     unsymmetric_check = PETSC_TRUE;
557   }
558   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
559 
560   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
561   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
562   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
563   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
564   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
565   if (isLU || isILU || isCHOL) {
566     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
567   } else {
568     F = NULL;
569   }
570 
571   /* Precompute stuffs needed for preprocessing and application of BDDC*/
572   if (n_constraints) {
573     Mat M1,M2,M3;
574     IS  is_aux;
575     /* see if we can save some allocations */
576     if (pcbddc->local_auxmat2) {
577       PetscInt on_R,on_constraints;
578       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
579       if (on_R != n_R || on_constraints != n_constraints) {
580         PetscScalar *marray;
581 
582         ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr);
583         ierr = PetscFree(marray);CHKERRQ(ierr);
584         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
585         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
586       }
587     }
588     /* auxiliary matrices */
589     if (!pcbddc->local_auxmat2) {
590       PetscScalar *marray;
591 
592       ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr);
593       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr);
594       marray += n_R*n_constraints;
595       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr);
596     }
597 
598     /* Extract constraints on R nodes: C_{CR}  */
599     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
600     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
601     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
602 
603     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
604     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
605     for (i=0;i<n_constraints;i++) {
606       const PetscScalar *row_cmat_values;
607       const PetscInt    *row_cmat_indices;
608       PetscInt          size_of_constraint,j;
609 
610       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
611       for (j=0;j<size_of_constraint;j++) {
612         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
613       }
614       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
615     }
616     if (F) {
617       Mat B;
618 
619       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
620       ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr);
621       ierr = MatDestroy(&B);CHKERRQ(ierr);
622     } else {
623       PetscScalar *xarray;
624       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
625       for (i=0;i<n_constraints;i++) {
626         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
627         ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr);
628         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
629         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
630         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
631       }
632       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
633     }
634 
635     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
636     ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
637     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
638     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
639     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
640     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
641     ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr);
642     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
643     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
644     ierr = MatDestroy(&M2);CHKERRQ(ierr);
645     ierr = MatDestroy(&M3);CHKERRQ(ierr);
646     /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */
647     ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
648     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
649     ierr = MatDestroy(&M1);CHKERRQ(ierr);
650   }
651   /* Get submatrices from subdomain matrix */
652   if (n_vertices) {
653     Mat       newmat;
654     IS        is_aux;
655     PetscInt  ibs,mbs;
656     PetscBool issbaij;
657 
658     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
659     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
660     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
661     if (ibs != mbs) { /* need to convert to SEQAIJ */
662       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
663       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
664       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
665       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
666       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
667     } else {
668       /* this is safe */
669       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
670       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
671       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
672         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
673         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
674         ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
675         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
676       } else {
677         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
678         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
679       }
680     }
681     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
682   }
683 
684   /* Matrix of coarse basis functions (local) */
685   if (pcbddc->coarse_phi_B) {
686     PetscInt on_B,on_primal,on_D=n_D;
687     if (pcbddc->coarse_phi_D) {
688       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
689     }
690     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
691     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
692       PetscScalar *marray;
693 
694       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
695       ierr = PetscFree(marray);CHKERRQ(ierr);
696       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
697       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
698       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
699       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
700     }
701   }
702 
703   if (!pcbddc->coarse_phi_B) {
704     PetscScalar *marray;
705 
706     n = n_B*pcbddc->local_primal_size;
707     if (pcbddc->switch_static || pcbddc->dbg_flag) {
708       n += n_D*pcbddc->local_primal_size;
709     }
710     if (!pcbddc->issym) {
711       n *= 2;
712     }
713     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
714     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
715     n = n_B*pcbddc->local_primal_size;
716     if (pcbddc->switch_static || pcbddc->dbg_flag) {
717       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
718       n += n_D*pcbddc->local_primal_size;
719     }
720     if (!pcbddc->issym) {
721       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
722       if (pcbddc->switch_static || pcbddc->dbg_flag) {
723         n = n_B*pcbddc->local_primal_size;
724         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
725       }
726     } else {
727       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
728       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
729       if (pcbddc->switch_static || pcbddc->dbg_flag) {
730         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
731         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
732       }
733     }
734   }
735   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
736   /* vertices */
737   if (n_vertices) {
738 
739     if (n_R) {
740       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
741       PetscBLASInt B_N,B_one = 1;
742       PetscScalar  *x,*y;
743 
744       ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
745       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
746       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
747       if (F) {
748         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
749       } else {
750         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
751         for (i=0;i<n_vertices;i++) {
752           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
753           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
754           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
755           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
756           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
757         }
758         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
759       }
760       ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr);
761       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
762       if (n_constraints) {
763         Mat B;
764         ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
765         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
766         ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
767         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
768         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
769         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
770         ierr = MatDestroy(&B);CHKERRQ(ierr);
771       }
772       ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
773       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
774       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
775       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
776       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
777       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
778       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
779       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
780       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
781       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
782       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
783       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
784     } else {
785       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
786       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
787     }
788     /* coarse basis functions */
789     for (i=0;i<n_vertices;i++) {
790       PetscScalar *y;
791 
792       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
793       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
794       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
795       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
796       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
797       y[n_B*i+idx_V_B[i]] = 1.0;
798       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
799       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
800 
801       if (pcbddc->switch_static || pcbddc->dbg_flag) {
802         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
803         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
804         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
805         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
806         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
807         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
808       }
809       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
810     }
811     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
812     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
813   }
814 
815   if (n_constraints) {
816     Mat B;
817 
818     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
819     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
820     ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
821     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
822     if (n_vertices) {
823       ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr);
824     }
825     ierr = MatDestroy(&B);CHKERRQ(ierr);
826     /* coarse basis functions */
827     for (i=0;i<n_constraints;i++) {
828       PetscScalar *y;
829 
830       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
831       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
832       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
833       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
834       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
835       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
836       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
837 
838       if (pcbddc->switch_static || pcbddc->dbg_flag) {
839         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
840         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
841         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
842         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
843         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
844         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
845       }
846       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
847     }
848   }
849 
850   /* compute other basis functions for non-symmetric problems */
851   if (!pcbddc->issym) {
852     Mat B,X;
853 
854     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr);
855 
856     if (n_constraints) {
857       Mat S_CCT,B_C;
858 
859       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
860       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
861       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
862       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
863       if (n_vertices) {
864         Mat B_V,S_VCT;
865 
866         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
867         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
868         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
869         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
870         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
871       }
872       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
873     }
874     if (n_vertices && n_R) {
875       Mat          A_VRT;
876       PetscBLASInt B_N,B_one = 1;
877 
878       if (!n_constraints) { /* if there are no constraints, reset work */
879         ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
880       }
881       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr);
882       ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
883       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
884       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one));
885       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
886     }
887 
888     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr);
889     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
890       for (i=0;i<pcbddc->local_primal_size;i++) {
891         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
892         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
893         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
894         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
895         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
896       }
897     } else {
898       for (i=0;i<pcbddc->local_primal_size;i++) {
899         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
900         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
901         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
902         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
903         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
904       }
905     }
906     ierr = MatDestroy(&B);CHKERRQ(ierr);
907     /* coarse basis functions */
908     for (i=0;i<pcbddc->local_primal_size;i++) {
909       PetscScalar *y;
910 
911       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
912       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
913       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
914       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
915       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
916       if (i<n_vertices) {
917         y[n_B*i+idx_V_B[i]] = 1.0;
918       }
919       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
920       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
921 
922       if (pcbddc->switch_static || pcbddc->dbg_flag) {
923         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
924         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
925         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
926         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
927         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
928         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
929       }
930       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
931     }
932     ierr = MatDestroy(&X);CHKERRQ(ierr);
933   }
934 
935   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
936   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
937   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
938   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
939   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
940   /* Checking coarse_sub_mat and coarse basis functios */
941   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
942   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
943   if (pcbddc->dbg_flag) {
944     Mat         coarse_sub_mat;
945     Mat         AUXMAT,TM1,TM2,TM3,TM4;
946     Mat         coarse_phi_D,coarse_phi_B;
947     Mat         coarse_psi_D,coarse_psi_B;
948     Mat         A_II,A_BB,A_IB,A_BI;
949     MatType     checkmattype=MATSEQAIJ;
950     PetscReal   real_value;
951 
952     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
953     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
954     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
955     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
956     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
957     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
958     if (unsymmetric_check) {
959       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
960       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
961     }
962     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
963 
964     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
965     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr);
966     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
967     if (unsymmetric_check) {
968       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
969       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
970       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
971       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
972       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
973       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
974       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
975       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
976       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
977       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
978       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
979       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
980     } else {
981       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
982       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
983       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
984       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
985       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
986       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
987       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
988       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
989     }
990     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
991     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
992     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
993     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
994     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
995     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
996 
997     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
998     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
999     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1000     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1001     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1002     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1003     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1004     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1005     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1006     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1007     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1008     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1009     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1010     if (unsymmetric_check) {
1011       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1012       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1013     }
1014     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1015   }
1016 
1017   /* free memory */
1018   ierr = PetscFree(work);CHKERRQ(ierr);
1019   if (n_vertices) {
1020     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1021   }
1022   if (n_constraints) {
1023     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1024   }
1025   /* get back data */
1026   *coarse_submat_vals_n = coarse_submat_vals;
1027   PetscFunctionReturn(0);
1028 }
1029 
1030 #undef __FUNCT__
1031 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1032 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, MatStructure reuse, Mat* B)
1033 {
1034   Mat            *work_mat;
1035   IS             isrow_s,iscol_s;
1036   PetscBool      rsorted,csorted;
1037   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1038   PetscErrorCode ierr;
1039 
1040   PetscFunctionBegin;
1041   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1042   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1043   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1044   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1045 
1046   if (!rsorted) {
1047     const PetscInt *idxs;
1048     PetscInt *idxs_sorted,i;
1049 
1050     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1051     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1052     for (i=0;i<rsize;i++) {
1053       idxs_perm_r[i] = i;
1054     }
1055     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1056     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1057     for (i=0;i<rsize;i++) {
1058       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1059     }
1060     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1061     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1062   } else {
1063     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1064     isrow_s = isrow;
1065   }
1066 
1067   if (!csorted) {
1068     if (isrow == iscol) {
1069       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1070       iscol_s = isrow_s;
1071     } else {
1072       const PetscInt *idxs;
1073       PetscInt *idxs_sorted,i;
1074 
1075       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1076       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1077       for (i=0;i<csize;i++) {
1078         idxs_perm_c[i] = i;
1079       }
1080       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1081       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1082       for (i=0;i<csize;i++) {
1083         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1084       }
1085       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1086       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1087     }
1088   } else {
1089     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1090     iscol_s = iscol;
1091   }
1092 
1093   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,reuse,&work_mat);CHKERRQ(ierr);
1094 
1095   if (!rsorted || !csorted) {
1096     Mat      new_mat;
1097     IS       is_perm_r,is_perm_c;
1098 
1099     if (!rsorted) {
1100       PetscInt *idxs_r,i;
1101       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1102       for (i=0;i<rsize;i++) {
1103         idxs_r[idxs_perm_r[i]] = i;
1104       }
1105       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1106       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1107     } else {
1108       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1109     }
1110     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1111 
1112     if (!csorted) {
1113       if (isrow_s == iscol_s) {
1114         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1115         is_perm_c = is_perm_r;
1116       } else {
1117         PetscInt *idxs_c,i;
1118         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1119         for (i=0;i<csize;i++) {
1120           idxs_c[idxs_perm_c[i]] = i;
1121         }
1122         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1123         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1124       }
1125     } else {
1126       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1127     }
1128     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1129 
1130     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1131     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1132     work_mat[0] = new_mat;
1133     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1134     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1135   }
1136 
1137   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1138   *B = work_mat[0];
1139   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1140   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1141   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #undef __FUNCT__
1146 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1147 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1148 {
1149   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1150   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1151   Mat            new_mat;
1152   IS             is_local,is_global;
1153   PetscInt       local_size;
1154   PetscBool      isseqaij;
1155   PetscErrorCode ierr;
1156 
1157   PetscFunctionBegin;
1158   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1159   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1160   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1161   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1162   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1163   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,MAT_INITIAL_MATRIX,&new_mat);CHKERRQ(ierr);
1164   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1165 
1166   /* check */
1167   if (pcbddc->dbg_flag) {
1168     Vec       x,x_change;
1169     PetscReal error;
1170 
1171     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1172     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1173     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1174     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1175     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1176     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1177     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1178     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1179     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1180     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1181     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1182     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1183     ierr = VecDestroy(&x);CHKERRQ(ierr);
1184     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1185   }
1186 
1187   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1188   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1189   if (isseqaij) {
1190     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1191   } else {
1192     Mat work_mat;
1193     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1194     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1195     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1196   }
1197   ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
1198   /*
1199   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1200   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1201   */
1202   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1203   PetscFunctionReturn(0);
1204 }
1205 
1206 #undef __FUNCT__
1207 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1208 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1209 {
1210   PC_IS*         pcis = (PC_IS*)(pc->data);
1211   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1212   IS             is_aux1,is_aux2;
1213   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1214   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
1215   PetscInt       vbs,bs;
1216   PetscBT        bitmask;
1217   PetscErrorCode ierr;
1218 
1219   PetscFunctionBegin;
1220   /*
1221     No need to setup local scatters if
1222       - primal space is unchanged
1223         AND
1224       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1225         AND
1226       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1227   */
1228   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1229     PetscFunctionReturn(0);
1230   }
1231   /* destroy old objects */
1232   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1233   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1234   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1235   /* Set Non-overlapping dimensions */
1236   n_B = pcis->n_B; n_D = pcis->n - n_B;
1237   n_vertices = pcbddc->n_actual_vertices;
1238   /* create auxiliary bitmask */
1239   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1240   for (i=0;i<n_vertices;i++) {
1241     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
1242   }
1243 
1244   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1245   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1246   for (i=0, n_R=0; i<pcis->n; i++) {
1247     if (!PetscBTLookup(bitmask,i)) {
1248       idx_R_local[n_R] = i;
1249       n_R++;
1250     }
1251   }
1252 
1253   /* Block code */
1254   vbs = 1;
1255   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1256   if (bs>1 && !(n_vertices%bs)) {
1257     PetscBool is_blocked = PETSC_TRUE;
1258     PetscInt  *vary;
1259     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1260     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1261     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1262     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
1263     for (i=0; i<n_vertices; i++) {
1264       if (vary[i]!=0 && vary[i]!=bs) {
1265         is_blocked = PETSC_FALSE;
1266         break;
1267       }
1268     }
1269     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1270       vbs = bs;
1271       for (i=0;i<n_R/vbs;i++) {
1272         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1273       }
1274     }
1275     ierr = PetscFree(vary);CHKERRQ(ierr);
1276   }
1277   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1278   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1279 
1280   /* print some info if requested */
1281   if (pcbddc->dbg_flag) {
1282     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1283     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1284     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1285     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1286     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1287     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1288     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
1289     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1290   }
1291 
1292   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1293   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1294   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1295   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1296   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1297   for (i=0; i<n_D; i++) {
1298     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1299   }
1300   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1301   for (i=0, j=0; i<n_R; i++) {
1302     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1303       aux_array1[j++] = i;
1304     }
1305   }
1306   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1307   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1308   for (i=0, j=0; i<n_B; i++) {
1309     if (!PetscBTLookup(bitmask,is_indices[i])) {
1310       aux_array2[j++] = i;
1311     }
1312   }
1313   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1314   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1315   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1316   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1317   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1318 
1319   if (pcbddc->switch_static || pcbddc->dbg_flag) {
1320     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1321     for (i=0, j=0; i<n_R; i++) {
1322       if (PetscBTLookup(bitmask,idx_R_local[i])) {
1323         aux_array1[j++] = i;
1324       }
1325     }
1326     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1327     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1328     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1329   }
1330   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1331   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1332   PetscFunctionReturn(0);
1333 }
1334 
1335 
1336 #undef __FUNCT__
1337 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1338 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1339 {
1340   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1341   PC_IS          *pcis = (PC_IS*)pc->data;
1342   PC             pc_temp;
1343   Mat            A_RR;
1344   MatReuse       reuse;
1345   PetscScalar    m_one = -1.0;
1346   PetscReal      value;
1347   PetscInt       n_D,n_R,ibs,mbs;
1348   PetscBool      use_exact,use_exact_reduced,issbaij;
1349   PetscErrorCode ierr;
1350   /* prefixes stuff */
1351   char           dir_prefix[256],neu_prefix[256],str_level[16];
1352   size_t         len;
1353 
1354   PetscFunctionBegin;
1355 
1356   /* compute prefixes */
1357   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1358   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1359   if (!pcbddc->current_level) {
1360     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1361     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1362     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1363     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1364   } else {
1365     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1366     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1367     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1368     len -= 15; /* remove "pc_bddc_coarse_" */
1369     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1370     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1371     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1372     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1373     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1374     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1375     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1376     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1377   }
1378 
1379   /* DIRICHLET PROBLEM */
1380   if (dirichlet) {
1381     /* Matrix for Dirichlet problem is pcis->A_II */
1382     ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1383     if (!pcbddc->ksp_D) { /* create object if not yet build */
1384       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1385       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1386       /* default */
1387       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1388       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1389       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1390       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1391       if (issbaij) {
1392         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1393       } else {
1394         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1395       }
1396       /* Allow user's customization */
1397       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1398       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1399     }
1400     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1401     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1402     if (!n_D) {
1403       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1404       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1405     }
1406     /* Set Up KSP for Dirichlet problem of BDDC */
1407     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1408     /* set ksp_D into pcis data */
1409     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1410     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1411     pcis->ksp_D = pcbddc->ksp_D;
1412   }
1413 
1414   /* NEUMANN PROBLEM */
1415   A_RR = 0;
1416   if (neumann) {
1417     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1418     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1419     if (pcbddc->ksp_R) { /* already created ksp */
1420       PetscInt nn_R;
1421       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1422       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1423       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1424       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1425         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1426         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1427         reuse = MAT_INITIAL_MATRIX;
1428       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1429         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1430           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1431           reuse = MAT_INITIAL_MATRIX;
1432         } else { /* safe to reuse the matrix */
1433           reuse = MAT_REUSE_MATRIX;
1434         }
1435       }
1436       /* last check */
1437       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1438         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1439         reuse = MAT_INITIAL_MATRIX;
1440       }
1441     } else { /* first time, so we need to create the matrix */
1442       reuse = MAT_INITIAL_MATRIX;
1443     }
1444     /* extract A_RR */
1445     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1446     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1447     if (ibs != mbs) {
1448       Mat newmat;
1449       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1450       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1451       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1452     } else {
1453       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1454     }
1455     if (!pcbddc->ksp_R) { /* create object if not present */
1456       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1457       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1458       /* default */
1459       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1460       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1461       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1462       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1463       if (issbaij) {
1464         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1465       } else {
1466         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1467       }
1468       /* Allow user's customization */
1469       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1470       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1471     }
1472     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1473     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1474     if (!n_R) {
1475       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1476       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1477     }
1478     /* Set Up KSP for Neumann problem of BDDC */
1479     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1480   }
1481 
1482   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1483   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1484     if (pcbddc->dbg_flag) {
1485       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1486       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1487       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1488     }
1489     if (dirichlet) { /* Dirichlet */
1490       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1491       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1492       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1493       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1494       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1495       /* need to be adapted? */
1496       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1497       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1498       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1499       /* print info */
1500       if (pcbddc->dbg_flag) {
1501         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1502         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1503       }
1504       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1505         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1506       }
1507     }
1508     if (neumann) { /* Neumann */
1509       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1510       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1511       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1512       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1513       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1514       /* need to be adapted? */
1515       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1516       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1517       /* print info */
1518       if (pcbddc->dbg_flag) {
1519         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1520         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1521       }
1522       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1523         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1524       }
1525     }
1526   }
1527   /* free Neumann problem's matrix */
1528   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 #undef __FUNCT__
1533 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1534 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1535 {
1536   PetscErrorCode ierr;
1537   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1538 
1539   PetscFunctionBegin;
1540   if (applytranspose) {
1541     if (pcbddc->local_auxmat1) {
1542       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1543       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1544     }
1545     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1546   } else {
1547     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1548     if (pcbddc->local_auxmat1) {
1549       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1550       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1551     }
1552   }
1553   PetscFunctionReturn(0);
1554 }
1555 
1556 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1557 #undef __FUNCT__
1558 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1559 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1560 {
1561   PetscErrorCode ierr;
1562   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1563   PC_IS*            pcis = (PC_IS*)  (pc->data);
1564   const PetscScalar zero = 0.0;
1565 
1566   PetscFunctionBegin;
1567   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1568   if (applytranspose) {
1569     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1570     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1571   } else {
1572     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1573     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1574   }
1575   /* start communications from local primal nodes to rhs of coarse solver */
1576   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1577   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1578   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1579 
1580   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1581   /* TODO remove null space when doing multilevel */
1582   if (pcbddc->coarse_ksp) {
1583     if (applytranspose) {
1584       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1585     } else {
1586       ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1587     }
1588   }
1589 
1590   /* Local solution on R nodes */
1591   if (pcis->n) {
1592     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1593     ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1594     ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1595     if (pcbddc->switch_static) {
1596       ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1597       ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1598     }
1599     ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1600     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1601     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1602     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1603     if (pcbddc->switch_static) {
1604       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1605       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1606     }
1607   }
1608 
1609   /* communications from coarse sol to local primal nodes */
1610   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1611   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1612 
1613   /* Sum contributions from two levels */
1614   if (applytranspose) {
1615     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1616     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1617   } else {
1618     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1619     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1620   }
1621   PetscFunctionReturn(0);
1622 }
1623 
1624 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */
1625 #undef __FUNCT__
1626 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1627 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1628 {
1629   PetscErrorCode ierr;
1630   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1631   PetscScalar    *array,*array2;
1632   Vec            from,to;
1633 
1634   PetscFunctionBegin;
1635   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1636     from = pcbddc->coarse_vec;
1637     to = pcbddc->vec1_P;
1638     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1639       Vec tvec;
1640       PetscInt lsize;
1641       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1642       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1643       ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1644       ierr = VecGetArray(from,&array2);CHKERRQ(ierr);
1645       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1646       ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1647       ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr);
1648     }
1649   } else { /* from local to global -> put data in coarse right hand side */
1650     from = pcbddc->vec1_P;
1651     to = pcbddc->coarse_vec;
1652   }
1653   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1654   PetscFunctionReturn(0);
1655 }
1656 
1657 #undef __FUNCT__
1658 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1659 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1660 {
1661   PetscErrorCode ierr;
1662   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1663   PetscScalar    *array,*array2;
1664   Vec            from,to;
1665 
1666   PetscFunctionBegin;
1667   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1668     from = pcbddc->coarse_vec;
1669     to = pcbddc->vec1_P;
1670   } else { /* from local to global -> put data in coarse right hand side */
1671     from = pcbddc->vec1_P;
1672     to = pcbddc->coarse_vec;
1673   }
1674   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1675   if (smode == SCATTER_FORWARD) {
1676     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1677       Vec tvec;
1678       PetscInt lsize;
1679       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1680       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1681       ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1682       ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr);
1683       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1684       ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1685       ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr);
1686     }
1687   }
1688   PetscFunctionReturn(0);
1689 }
1690 
1691 /* uncomment for testing purposes */
1692 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1693 #undef __FUNCT__
1694 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1695 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1696 {
1697   PetscErrorCode    ierr;
1698   PC_IS*            pcis = (PC_IS*)(pc->data);
1699   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1700   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1701   /* one and zero */
1702   PetscScalar       one=1.0,zero=0.0;
1703   /* space to store constraints and their local indices */
1704   PetscScalar       *temp_quadrature_constraint;
1705   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1706   /* iterators */
1707   PetscInt          i,j,k,total_counts,temp_start_ptr;
1708   /* stuff to store connected components stored in pcbddc->mat_graph */
1709   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1710   PetscInt          n_ISForFaces,n_ISForEdges;
1711   /* near null space stuff */
1712   MatNullSpace      nearnullsp;
1713   const Vec         *nearnullvecs;
1714   Vec               *localnearnullsp;
1715   PetscBool         nnsp_has_cnst;
1716   PetscInt          nnsp_size;
1717   PetscScalar       *array;
1718   /* BLAS integers */
1719   PetscBLASInt      lwork,lierr;
1720   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1721   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1722   /* LAPACK working arrays for SVD or POD */
1723   PetscBool         skip_lapack;
1724   PetscScalar       *work;
1725   PetscReal         *singular_vals;
1726 #if defined(PETSC_USE_COMPLEX)
1727   PetscReal         *rwork;
1728 #endif
1729 #if defined(PETSC_MISSING_LAPACK_GESVD)
1730   PetscBLASInt      Blas_one_2=1;
1731   PetscScalar       *temp_basis,*correlation_mat;
1732 #else
1733   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1734   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1735 #endif
1736   /* reuse */
1737   PetscInt          olocal_primal_size;
1738   PetscInt          *oprimal_indices_local_idxs;
1739   /* change of basis */
1740   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1741   PetscBool         boolforchange,qr_needed;
1742   PetscBT           touched,change_basis,qr_needed_idx;
1743   /* auxiliary stuff */
1744   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1745   PetscInt          ncc,*gidxs=NULL,*permutation=NULL,*temp_indices_to_constraint_work=NULL;
1746   PetscScalar       *temp_quadrature_constraint_work=NULL;
1747   /* some quantities */
1748   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1749   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1750 
1751 
1752   PetscFunctionBegin;
1753   /* Destroy Mat objects computed previously */
1754   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1755   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1756   /* TODO synch with adaptive selection */
1757   /* Get index sets for faces, edges and vertices from graph */
1758   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1759   /* free unneeded index sets */
1760   if (!pcbddc->use_vertices) {
1761     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1762   }
1763   if (!pcbddc->use_edges) {
1764     for (i=0;i<n_ISForEdges;i++) {
1765       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1766     }
1767     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1768     n_ISForEdges = 0;
1769   }
1770   if (!pcbddc->use_faces) {
1771     for (i=0;i<n_ISForFaces;i++) {
1772       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1773     }
1774     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1775     n_ISForFaces = 0;
1776   }
1777   /* HACKS (the following two blocks of code) */
1778   if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1779     pcbddc->use_change_of_basis = PETSC_TRUE;
1780     if (!ISForEdges) {
1781       pcbddc->use_change_on_faces = PETSC_TRUE;
1782     }
1783   }
1784   if (pcbddc->NullSpace) {
1785     /* use_change_of_basis should be consistent among processors */
1786     PetscBool tbool[2],gbool[2];
1787     tbool [0] = pcbddc->use_change_of_basis;
1788     tbool [1] = pcbddc->use_change_on_faces;
1789     ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1790     pcbddc->use_change_of_basis = gbool[0];
1791     pcbddc->use_change_on_faces = gbool[1];
1792   }
1793   /* print some info */
1794   if (pcbddc->dbg_flag) {
1795     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1796     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1797     i = 0;
1798     if (ISForVertices) {
1799       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1800     }
1801     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1802     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1803     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1804     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1805   }
1806 
1807   if (!pcbddc->adaptive_selection) {
1808     /* check if near null space is attached to global mat */
1809     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1810     if (nearnullsp) {
1811       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1812       /* remove any stored info */
1813       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1814       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1815       /* store information for BDDC solver reuse */
1816       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1817       pcbddc->onearnullspace = nearnullsp;
1818       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1819       for (i=0;i<nnsp_size;i++) {
1820         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1821       }
1822     } else { /* if near null space is not provided BDDC uses constants by default */
1823       nnsp_size = 0;
1824       nnsp_has_cnst = PETSC_TRUE;
1825     }
1826     /* get max number of constraints on a single cc */
1827     max_constraints = nnsp_size;
1828     if (nnsp_has_cnst) max_constraints++;
1829 
1830     /*
1831          Evaluate maximum storage size needed by the procedure
1832          - temp_indices will contain start index of each constraint stored as follows
1833          - temp_indices_to_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1834          - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1835          - temp_quadrature_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself
1836                                                                                                                                                            */
1837     total_counts = n_ISForFaces+n_ISForEdges;
1838     total_counts *= max_constraints;
1839     n_vertices = 0;
1840     if (ISForVertices) {
1841       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1842     }
1843     total_counts += n_vertices;
1844     ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr);
1845     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1846     total_counts = 0;
1847     max_size_of_constraint = 0;
1848     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1849       if (i<n_ISForEdges) {
1850         used_IS = &ISForEdges[i];
1851       } else {
1852         used_IS = &ISForFaces[i-n_ISForEdges];
1853       }
1854       ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1855       total_counts += j;
1856       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1857     }
1858     total_counts *= max_constraints;
1859     total_counts += n_vertices;
1860     ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1861     /* get local part of global near null space vectors */
1862     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1863     for (k=0;k<nnsp_size;k++) {
1864       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1865       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1866       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1867     }
1868 
1869     /* whether or not to skip lapack calls */
1870     skip_lapack = PETSC_TRUE;
1871     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
1872 
1873     /* allocate some auxiliary stuff */
1874     if (!skip_lapack || pcbddc->use_qr_single) {
1875       ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr);
1876     }
1877 
1878     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1879     if (!skip_lapack) {
1880       PetscScalar temp_work;
1881 
1882 #if defined(PETSC_MISSING_LAPACK_GESVD)
1883       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1884       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1885       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1886       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1887 #if defined(PETSC_USE_COMPLEX)
1888       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1889 #endif
1890       /* now we evaluate the optimal workspace using query with lwork=-1 */
1891       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1892       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1893       lwork = -1;
1894       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1895 #if !defined(PETSC_USE_COMPLEX)
1896       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1897 #else
1898       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1899 #endif
1900       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1901       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1902 #else /* on missing GESVD */
1903       /* SVD */
1904       PetscInt max_n,min_n;
1905       max_n = max_size_of_constraint;
1906       min_n = max_constraints;
1907       if (max_size_of_constraint < max_constraints) {
1908         min_n = max_size_of_constraint;
1909         max_n = max_constraints;
1910       }
1911       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1912 #if defined(PETSC_USE_COMPLEX)
1913       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1914 #endif
1915       /* now we evaluate the optimal workspace using query with lwork=-1 */
1916       lwork = -1;
1917       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1918       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1919       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1920       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1921 #if !defined(PETSC_USE_COMPLEX)
1922       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1923 #else
1924       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1925 #endif
1926       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1927       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1928 #endif /* on missing GESVD */
1929       /* Allocate optimal workspace */
1930       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1931       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
1932     }
1933     /* Now we can loop on constraining sets */
1934     total_counts = 0;
1935     temp_indices[0] = 0;
1936     /* vertices */
1937     if (ISForVertices) {
1938       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1939       if (nnsp_has_cnst) { /* consider all vertices */
1940         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1941         for (i=0;i<n_vertices;i++) {
1942           temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1943           temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1944           total_counts++;
1945         }
1946       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1947         PetscBool used_vertex;
1948         for (i=0;i<n_vertices;i++) {
1949           used_vertex = PETSC_FALSE;
1950           k = 0;
1951           while (!used_vertex && k<nnsp_size) {
1952             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1953             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1954               temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1955               temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1956               temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1957               total_counts++;
1958               used_vertex = PETSC_TRUE;
1959             }
1960             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1961             k++;
1962           }
1963         }
1964       }
1965       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1966       n_vertices = total_counts;
1967     }
1968 
1969     /* edges and faces */
1970     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
1971       if (ncc<n_ISForEdges) {
1972         used_IS = &ISForEdges[ncc];
1973         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1974       } else {
1975         used_IS = &ISForFaces[ncc-n_ISForEdges];
1976         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1977       }
1978       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1979       temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1980       ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1981       ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1982       /* change of basis should not be performed on local periodic nodes */
1983       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1984       if (nnsp_has_cnst) {
1985         PetscScalar quad_value;
1986         temp_constraints++;
1987         if (!pcbddc->use_nnsp_true) {
1988           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1989         } else {
1990           quad_value = 1.0;
1991         }
1992         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1993         for (j=0;j<size_of_constraint;j++) {
1994           temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1995         }
1996         /* sort by global ordering if using lapack subroutines (not needed!) */
1997         if (!skip_lapack || pcbddc->use_qr_single) {
1998           ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
1999           for (j=0;j<size_of_constraint;j++) {
2000             permutation[j]=j;
2001           }
2002           ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2003           for (j=0;j<size_of_constraint;j++) {
2004             if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen");
2005           }
2006           for (j=0;j<size_of_constraint;j++) {
2007             temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2008             temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2009           }
2010           ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2011           ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2012         }
2013         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2014         total_counts++;
2015       }
2016       for (k=0;k<nnsp_size;k++) {
2017         PetscReal real_value;
2018         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2019         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2020         for (j=0;j<size_of_constraint;j++) {
2021           temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
2022         }
2023         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2024         /* check if array is null on the connected component */
2025         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2026         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
2027         if (real_value > 0.0) { /* keep indices and values */
2028           /* sort by global ordering if using lapack subroutines */
2029           if (!skip_lapack || pcbddc->use_qr_single) {
2030             ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
2031             for (j=0;j<size_of_constraint;j++) {
2032               permutation[j]=j;
2033             }
2034             ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2035             for (j=0;j<size_of_constraint;j++) {
2036               temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2037               temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2038             }
2039             ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2040             ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2041           }
2042           temp_constraints++;
2043           temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2044           total_counts++;
2045         }
2046       }
2047       ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2048       valid_constraints = temp_constraints;
2049       if (!pcbddc->use_nnsp_true && temp_constraints) {
2050         if (temp_constraints == 1) { /* just normalize the constraint */
2051           PetscScalar norm;
2052           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2053           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2054           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2055           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2056         } else { /* perform SVD */
2057           PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2058 
2059 #if defined(PETSC_MISSING_LAPACK_GESVD)
2060           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2061              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2062              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2063                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2064                 from that computed using LAPACKgesvd
2065              -> This is due to a different computation of eigenvectors in LAPACKheev
2066              -> The quality of the POD-computed basis will be the same */
2067           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2068           /* Store upper triangular part of correlation matrix */
2069           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2070           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2071           for (j=0;j<temp_constraints;j++) {
2072             for (k=0;k<j+1;k++) {
2073               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
2074             }
2075           }
2076           /* compute eigenvalues and eigenvectors of correlation matrix */
2077           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2078           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2079 #if !defined(PETSC_USE_COMPLEX)
2080           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2081 #else
2082           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2083 #endif
2084           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2085           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2086           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2087           j = 0;
2088           while (j < temp_constraints && singular_vals[j] < tol) j++;
2089           total_counts = total_counts-j;
2090           valid_constraints = temp_constraints-j;
2091           /* scale and copy POD basis into used quadrature memory */
2092           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2093           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2094           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2095           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2096           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2097           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2098           if (j<temp_constraints) {
2099             PetscInt ii;
2100             for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
2101             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2102             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2103             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2104             for (k=0;k<temp_constraints-j;k++) {
2105               for (ii=0;ii<size_of_constraint;ii++) {
2106                 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2107               }
2108             }
2109           }
2110 #else  /* on missing GESVD */
2111           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2112           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2113           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2114           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2115 #if !defined(PETSC_USE_COMPLEX)
2116           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
2117 #else
2118           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
2119 #endif
2120           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2121           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2122           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2123           k = temp_constraints;
2124           if (k > size_of_constraint) k = size_of_constraint;
2125           j = 0;
2126           while (j < k && singular_vals[k-j-1] < tol) j++;
2127           valid_constraints = k-j;
2128           total_counts = total_counts-temp_constraints+valid_constraints;
2129 #endif /* on missing GESVD */
2130         }
2131       }
2132       /* setting change_of_basis flag is safe now */
2133       if (boolforchange) {
2134         for (j=0;j<valid_constraints;j++) {
2135           PetscBTSet(change_basis,total_counts-j-1);
2136         }
2137       }
2138     }
2139     /* free workspace */
2140     if (!skip_lapack || pcbddc->use_qr_single) {
2141       ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr);
2142     }
2143     if (!skip_lapack) {
2144       ierr = PetscFree(work);CHKERRQ(ierr);
2145 #if defined(PETSC_USE_COMPLEX)
2146       ierr = PetscFree(rwork);CHKERRQ(ierr);
2147 #endif
2148       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2149 #if defined(PETSC_MISSING_LAPACK_GESVD)
2150       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2151       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2152 #endif
2153     }
2154     for (k=0;k<nnsp_size;k++) {
2155       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2156     }
2157     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2158   } else {
2159     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2160     PetscInt        cum = 0;
2161 
2162     total_counts = 0;
2163     n_vertices = 0;
2164     if (sub_schurs->is_Ej_com) {
2165       ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr);
2166     }
2167     max_constraints = 0;
2168     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2169       total_counts += pcbddc->adaptive_constraints_n[i];
2170       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2171     }
2172     temp_indices = pcbddc->adaptive_constraints_ptrs;
2173     temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs;
2174     temp_quadrature_constraint = pcbddc->adaptive_constraints_data;
2175 
2176 #if 0
2177     printf("Found %d totals\n",total_counts);
2178     for (i=0;i<total_counts;i++) {
2179       printf("const %d, start %d",i,temp_indices[i]);
2180       printf(" end %d:\n",temp_indices[i+1]);
2181       for (j=temp_indices[i];j<temp_indices[i+1];j++) {
2182         printf("  idxs %d",temp_indices_to_constraint[j]);
2183         printf("  data %1.2e\n",temp_quadrature_constraint[j]);
2184       }
2185     }
2186     for (i=0;i<n_vertices;i++) {
2187       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i+n_vertices]);
2188     }
2189     for (i=0;i<sub_schurs->n_subs;i++) {
2190       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2191     }
2192 #endif
2193 
2194     for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]);
2195     ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr);
2196     /* Change of basis */
2197     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2198     if (pcbddc->use_change_of_basis) {
2199       cum = n_vertices;
2200       for (i=0;i<sub_schurs->n_subs;i++) {
2201         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2202           for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) {
2203             ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr);
2204           }
2205         }
2206         cum += pcbddc->adaptive_constraints_n[i+n_vertices];
2207       }
2208     }
2209   }
2210 
2211   /* free index sets of faces, edges and vertices */
2212   for (i=0;i<n_ISForFaces;i++) {
2213     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2214   }
2215   if (n_ISForFaces) {
2216     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2217   }
2218   for (i=0;i<n_ISForEdges;i++) {
2219     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2220   }
2221   if (n_ISForEdges) {
2222     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2223   }
2224   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2225 
2226   /* map temp_indices_to_constraint in boundary numbering */
2227   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
2228   if (i != temp_indices[total_counts]) {
2229     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
2230   }
2231 
2232   /* set quantities in pcbddc data structure and store previous primal size */
2233   /* n_vertices defines the number of subdomain corners in the primal space */
2234   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
2235   olocal_primal_size = pcbddc->local_primal_size;
2236   pcbddc->local_primal_size = total_counts;
2237   pcbddc->n_vertices = n_vertices;
2238   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
2239 
2240   /* Create constraint matrix */
2241   /* The constraint matrix is used to compute the l2g map of primal dofs */
2242   /* so we need to set it up properly either with or without change of basis */
2243   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2244   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2245   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2246   /* array to compute a local numbering of constraints : vertices first then constraints */
2247   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
2248   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
2249   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
2250   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
2251   /* auxiliary stuff for basis change */
2252   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
2253   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
2254 
2255   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2256   total_primal_vertices=0;
2257   for (i=0;i<pcbddc->local_primal_size;i++) {
2258     size_of_constraint=temp_indices[i+1]-temp_indices[i];
2259     if (size_of_constraint == 1) {
2260       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
2261       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
2262       aux_primal_minloc[total_primal_vertices]=0;
2263       total_primal_vertices++;
2264     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
2265       PetscInt min_loc,min_index;
2266       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
2267       /* find first untouched local node */
2268       k = 0;
2269       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
2270       min_index = global_indices[k];
2271       min_loc = k;
2272       /* search the minimum among global nodes already untouched on the cc */
2273       for (k=1;k<size_of_constraint;k++) {
2274         /* there can be more than one constraint on a single connected component */
2275         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
2276           min_index = global_indices[k];
2277           min_loc = k;
2278         }
2279       }
2280       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
2281       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
2282       aux_primal_minloc[total_primal_vertices]=min_loc;
2283       total_primal_vertices++;
2284     }
2285   }
2286   /* determine if a QR strategy is needed for change of basis */
2287   qr_needed = PETSC_FALSE;
2288   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
2289   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2290     if (PetscBTLookup(change_basis,i)) {
2291       if (!pcbddc->use_qr_single) {
2292         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2293         j = 0;
2294         for (k=0;k<size_of_constraint;k++) {
2295           if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
2296             j++;
2297           }
2298         }
2299         /* found more than one primal dof on the cc */
2300         if (j > 1) {
2301           PetscBTSet(qr_needed_idx,i);
2302           qr_needed = PETSC_TRUE;
2303         }
2304       } else {
2305         PetscBTSet(qr_needed_idx,i);
2306         qr_needed = PETSC_TRUE;
2307       }
2308     }
2309   }
2310   /* free workspace */
2311   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2312 
2313   /* permute indices in order to have a sorted set of vertices */
2314   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
2315 
2316   /* nonzero structure of constraint matrix */
2317   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2318   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
2319   j=total_primal_vertices;
2320   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2321     if (!PetscBTLookup(change_basis,i)) {
2322       nnz[j]=temp_indices[i+1]-temp_indices[i];
2323       j++;
2324     }
2325   }
2326   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2327   ierr = PetscFree(nnz);CHKERRQ(ierr);
2328   /* set values in constraint matrix */
2329   for (i=0;i<total_primal_vertices;i++) {
2330     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2331   }
2332   total_counts = total_primal_vertices;
2333   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2334     if (!PetscBTLookup(change_basis,i)) {
2335       size_of_constraint=temp_indices[i+1]-temp_indices[i];
2336       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
2337       total_counts++;
2338     }
2339   }
2340   /* assembling */
2341   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2342   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2343   /*
2344   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2345   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2346   */
2347   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2348   if (pcbddc->use_change_of_basis) {
2349     /* dual and primal dofs on a single cc */
2350     PetscInt     dual_dofs,primal_dofs;
2351     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
2352     PetscInt     primal_counter;
2353     /* working stuff for GEQRF */
2354     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2355     PetscBLASInt lqr_work;
2356     /* working stuff for UNGQR */
2357     PetscScalar  *gqr_work,lgqr_work_t;
2358     PetscBLASInt lgqr_work;
2359     /* working stuff for TRTRS */
2360     PetscScalar  *trs_rhs;
2361     PetscBLASInt Blas_NRHS;
2362     /* pointers for values insertion into change of basis matrix */
2363     PetscInt     *start_rows,*start_cols;
2364     PetscScalar  *start_vals;
2365     /* working stuff for values insertion */
2366     PetscBT      is_primal;
2367     /* matrix sizes */
2368     PetscInt     global_size,local_size;
2369     /* temporary change of basis */
2370     Mat          localChangeOfBasisMatrix;
2371 
2372     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2373     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2374     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2375     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2376     /* nonzeros for local mat */
2377     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2378     for (i=0;i<pcis->n;i++) nnz[i]=1;
2379     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2380       if (PetscBTLookup(change_basis,i)) {
2381         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2382         if (PetscBTLookup(qr_needed_idx,i)) {
2383           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2384         } else {
2385           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2;
2386           /* get local primal index on the cc */
2387           j = 0;
2388           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
2389           nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2390         }
2391       }
2392     }
2393     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2394     ierr = PetscFree(nnz);CHKERRQ(ierr);
2395     /* Set initial identity in the matrix */
2396     for (i=0;i<pcis->n;i++) {
2397       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2398     }
2399 
2400     if (pcbddc->dbg_flag) {
2401       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2402       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2403     }
2404 
2405 
2406     /* Now we loop on the constraints which need a change of basis */
2407     /*
2408        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2409        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2410 
2411        Basic blocks of change of basis matrix T computed by
2412 
2413           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2414 
2415             | 1        0   ...        0         s_1/S |
2416             | 0        1   ...        0         s_2/S |
2417             |              ...                        |
2418             | 0        ...            1     s_{n-1}/S |
2419             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2420 
2421             with S = \sum_{i=1}^n s_i^2
2422             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2423                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2424 
2425           - QR decomposition of constraints otherwise
2426     */
2427     if (qr_needed) {
2428       /* space to store Q */
2429       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2430       /* first we issue queries for optimal work */
2431       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2432       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2433       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2434       lqr_work = -1;
2435       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2436       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2437       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2438       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2439       lgqr_work = -1;
2440       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2441       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2442       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2443       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2444       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2445       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2446       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2447       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2448       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2449       /* array to store scaling factors for reflectors */
2450       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2451       /* array to store rhs and solution of triangular solver */
2452       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2453       /* allocating workspace for check */
2454       if (pcbddc->dbg_flag) {
2455         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr);
2456       }
2457     }
2458     /* array to store whether a node is primal or not */
2459     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2460     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2461     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2462     if (i != total_primal_vertices) {
2463       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2464     }
2465     for (i=0;i<total_primal_vertices;i++) {
2466       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2467     }
2468     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2469 
2470     /* loop on constraints and see whether or not they need a change of basis and compute it */
2471     /* -> using implicit ordering contained in temp_indices data */
2472     total_counts = pcbddc->n_vertices;
2473     primal_counter = total_counts;
2474     while (total_counts<pcbddc->local_primal_size) {
2475       primal_dofs = 1;
2476       if (PetscBTLookup(change_basis,total_counts)) {
2477         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2478         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) {
2479           primal_dofs++;
2480         }
2481         /* get constraint info */
2482         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2483         dual_dofs = size_of_constraint-primal_dofs;
2484 
2485         if (pcbddc->dbg_flag) {
2486           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2487         }
2488 
2489         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2490 
2491           /* copy quadrature constraints for change of basis check */
2492           if (pcbddc->dbg_flag) {
2493             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2494           }
2495           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2496           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2497 
2498           /* compute QR decomposition of constraints */
2499           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2500           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2501           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2502           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2503           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2504           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2505           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2506 
2507           /* explictly compute R^-T */
2508           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2509           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2510           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2511           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2512           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2513           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2514           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2515           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2516           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2517           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2518 
2519           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2520           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2521           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2522           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2523           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2524           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2525           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2526           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2527           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2528 
2529           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2530              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2531              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2532           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2533           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2534           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2535           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2536           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2537           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2538           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2539           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2540           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2541           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2542 
2543           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2544           start_rows = &temp_indices_to_constraint[temp_indices[total_counts]];
2545           /* insert cols for primal dofs */
2546           for (j=0;j<primal_dofs;j++) {
2547             start_vals = &qr_basis[j*size_of_constraint];
2548             start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2549             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2550           }
2551           /* insert cols for dual dofs */
2552           for (j=0,k=0;j<dual_dofs;k++) {
2553             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2554               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2555               start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k];
2556               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2557               j++;
2558             }
2559           }
2560 
2561           /* check change of basis */
2562           if (pcbddc->dbg_flag) {
2563             PetscInt   ii,jj;
2564             PetscBool valid_qr=PETSC_TRUE;
2565             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2566             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2567             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2568             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2569             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2570             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2571             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2572             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2573             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2574             for (jj=0;jj<size_of_constraint;jj++) {
2575               for (ii=0;ii<primal_dofs;ii++) {
2576                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2577                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2578               }
2579             }
2580             if (!valid_qr) {
2581               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2582               for (jj=0;jj<size_of_constraint;jj++) {
2583                 for (ii=0;ii<primal_dofs;ii++) {
2584                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2585                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2586                   }
2587                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2588                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2589                   }
2590                 }
2591               }
2592             } else {
2593               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2594             }
2595           }
2596         } else { /* simple transformation block */
2597           PetscInt    row,col;
2598           PetscScalar val,norm;
2599 
2600           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2601           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one));
2602           for (j=0;j<size_of_constraint;j++) {
2603             PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2604             row = temp_indices_to_constraint[temp_indices[total_counts]+j];
2605             if (!PetscBTLookup(is_primal,row_B)) {
2606               col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2607               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2608               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2609             } else {
2610               for (k=0;k<size_of_constraint;k++) {
2611                 col = temp_indices_to_constraint[temp_indices[total_counts]+k];
2612                 if (row != col) {
2613                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2614                 } else {
2615                   val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm;
2616                 }
2617                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2618               }
2619             }
2620           }
2621           if (pcbddc->dbg_flag) {
2622             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2623           }
2624         }
2625         /* increment primal counter */
2626         primal_counter += primal_dofs;
2627       } else {
2628         if (pcbddc->dbg_flag) {
2629           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2630         }
2631       }
2632       /* increment constraint counter total_counts */
2633       total_counts += primal_dofs;
2634     }
2635 
2636     /* free workspace */
2637     if (qr_needed) {
2638       if (pcbddc->dbg_flag) {
2639         ierr = PetscFree(work);CHKERRQ(ierr);
2640       }
2641       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2642       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2643       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2644       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2645       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2646     }
2647     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2648     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2649     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2650 
2651     /* assembling of global change of variable */
2652     {
2653       Mat      tmat;
2654       PetscInt bs;
2655 
2656       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2657       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2658       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2659       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2660       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2661       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2662       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
2663       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
2664       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2665       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2666       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2667       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2668       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2669       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2670       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2671       ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2672       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2673       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2674     }
2675     /* check */
2676     if (pcbddc->dbg_flag) {
2677       PetscReal error;
2678       Vec       x,x_change;
2679 
2680       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2681       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2682       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2683       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2684       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2685       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2686       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2687       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2688       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2689       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2690       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2691       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2692       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2693       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2694       ierr = VecDestroy(&x);CHKERRQ(ierr);
2695       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2696     }
2697 
2698     /* adapt sub_schurs computed (if any) */
2699     if (pcbddc->use_deluxe_scaling) {
2700       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2701       if (sub_schurs->n_subs_par_g) {
2702         SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Change of basis with deluxe scaling and parallel problems still needs to be implemented");
2703       }
2704       if (sub_schurs->S_Ej_all) {
2705         Mat S_1,S_2,tmat;
2706         IS is_all_N;
2707 
2708         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2709         ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
2710         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2711         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr);
2712         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2713         sub_schurs->S_Ej_all = S_1;
2714         ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr);
2715         ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2716         sub_schurs->sum_S_Ej_all = S_2;
2717         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2718       }
2719     }
2720     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2721   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2722     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2723     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2724   }
2725 
2726   /* set up change of basis context */
2727   if (pcbddc->ChangeOfBasisMatrix) {
2728     PCBDDCChange_ctx change_ctx;
2729 
2730     if (!pcbddc->new_global_mat) {
2731       PetscInt global_size,local_size;
2732 
2733       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2734       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2735       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2736       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2737       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2738       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2739       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2740       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2741       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2742     } else {
2743       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2744       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2745       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2746     }
2747     if (!pcbddc->user_ChangeOfBasisMatrix) {
2748       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2749       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2750     } else {
2751       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2752       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2753     }
2754     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2755     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2756     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2757     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2758   }
2759 
2760   /* get indices in local ordering for vertices and constraints */
2761   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2762     ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
2763     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2764   }
2765   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2766   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2767   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2768   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2769   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2770   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2771   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2772   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2773   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2774   /* set quantities in PCBDDC data struct */
2775   pcbddc->n_actual_vertices = i;
2776   /* check if a new primal space has been introduced */
2777   pcbddc->new_primal_space_local = PETSC_TRUE;
2778   if (olocal_primal_size == pcbddc->local_primal_size) {
2779     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2780     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2781     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2782   }
2783   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2784   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2785 
2786   /* flush dbg viewer */
2787   if (pcbddc->dbg_flag) {
2788     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2789   }
2790 
2791   /* free workspace */
2792   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2793   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2794   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2795   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2796   if (!pcbddc->adaptive_selection) {
2797     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2798     ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr);
2799   } else {
2800     ierr = PetscFree4(pcbddc->adaptive_constraints_n,
2801                       pcbddc->adaptive_constraints_ptrs,
2802                       pcbddc->adaptive_constraints_idxs,
2803                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2804     ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2805   }
2806   PetscFunctionReturn(0);
2807 }
2808 
2809 #undef __FUNCT__
2810 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2811 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2812 {
2813   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2814   PC_IS       *pcis = (PC_IS*)pc->data;
2815   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2816   PetscInt    ierr,i,vertex_size;
2817   PetscViewer viewer=pcbddc->dbg_viewer;
2818 
2819   PetscFunctionBegin;
2820   /* Reset previously computed graph */
2821   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2822   /* Init local Graph struct */
2823   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2824 
2825   /* Check validity of the csr graph passed in by the user */
2826   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2827     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2828   }
2829 
2830   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2831   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2832     Mat       mat_adj;
2833     PetscInt  *xadj,*adjncy;
2834     PetscInt  nvtxs;
2835     PetscBool flg_row=PETSC_TRUE;
2836 
2837     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2838     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2839     if (!flg_row) {
2840       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2841     }
2842     if (pcbddc->use_local_adj) {
2843       ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2844       pcbddc->computed_rowadj = PETSC_TRUE;
2845     } else { /* just compute subdomain's connected components */
2846       IS                     is_dummy;
2847       ISLocalToGlobalMapping l2gmap_dummy;
2848       PetscInt               j,sum;
2849       PetscInt               *cxadj,*cadjncy;
2850       const PetscInt         *idxs;
2851       PCBDDCGraph            graph;
2852       PetscBT                is_on_boundary;
2853 
2854       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
2855       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
2856       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
2857       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
2858       ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr);
2859       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
2860       graph->xadj = xadj;
2861       graph->adjncy = adjncy;
2862       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
2863       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
2864 
2865       if (pcbddc->dbg_flag) {
2866         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr);
2867         for (i=0;i<graph->ncc;i++) {
2868           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
2869         }
2870         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2871       }
2872 
2873       ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr);
2874       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2875       for (i=0;i<pcis->n_B;i++) {
2876         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
2877       }
2878       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2879 
2880       ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr);
2881       sum = 0;
2882       for (i=0;i<graph->ncc;i++) {
2883         PetscInt sizecc = 0;
2884         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2885           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2886             sizecc++;
2887           }
2888         }
2889         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2890           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2891             cxadj[graph->queue[j]] = sizecc;
2892           }
2893         }
2894         sum += sizecc*sizecc;
2895       }
2896       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
2897       sum = 0;
2898       for (i=0;i<nvtxs;i++) {
2899         PetscInt temp = cxadj[i];
2900         cxadj[i] = sum;
2901         sum += temp;
2902       }
2903       cxadj[nvtxs] = sum;
2904       for (i=0;i<graph->ncc;i++) {
2905         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2906           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2907             PetscInt k,sizecc = 0;
2908             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
2909               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
2910                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
2911                 sizecc++;
2912               }
2913             }
2914           }
2915         }
2916       }
2917       if (nvtxs) {
2918         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
2919       } else {
2920         ierr = PetscFree(cxadj);CHKERRQ(ierr);
2921         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
2922       }
2923       graph->xadj = 0;
2924       graph->adjncy = 0;
2925       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
2926       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
2927     }
2928     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2929     if (!flg_row) {
2930       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2931     }
2932     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2933   }
2934 
2935   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2936   vertex_size = 1;
2937   if (pcbddc->user_provided_isfordofs) {
2938     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2939       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2940       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2941         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2942         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2943       }
2944       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2945       pcbddc->n_ISForDofs = 0;
2946       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2947     }
2948     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2949     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2950   } else {
2951     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2952       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2953       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2954       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2955         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2956       }
2957     }
2958   }
2959 
2960   /* Setup of Graph */
2961   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2962     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2963   }
2964   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2965     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2966   }
2967   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
2968 
2969   /* Graph's connected components analysis */
2970   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2971 
2972   /* print some info to stdout */
2973   if (pcbddc->dbg_flag) {
2974     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2975   }
2976 
2977   /* mark topography has done */
2978   pcbddc->recompute_topography = PETSC_FALSE;
2979   PetscFunctionReturn(0);
2980 }
2981 
2982 #undef __FUNCT__
2983 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2984 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2985 {
2986   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2987   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2988   PetscErrorCode ierr;
2989 
2990   PetscFunctionBegin;
2991   n = 0;
2992   vertices = 0;
2993   if (pcbddc->ConstraintMatrix) {
2994     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2995     for (i=0;i<local_primal_size;i++) {
2996       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2997       if (size_of_constraint == 1) n++;
2998       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2999     }
3000     if (vertices_idx) {
3001       ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr);
3002       n = 0;
3003       for (i=0;i<local_primal_size;i++) {
3004         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3005         if (size_of_constraint == 1) {
3006           vertices[n++]=row_cmat_indices[0];
3007         }
3008         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3009       }
3010     }
3011   }
3012   *n_vertices = n;
3013   if (vertices_idx) *vertices_idx = vertices;
3014   PetscFunctionReturn(0);
3015 }
3016 
3017 #undef __FUNCT__
3018 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
3019 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
3020 {
3021   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
3022   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
3023   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
3024   PetscBT        touched;
3025   PetscErrorCode ierr;
3026 
3027     /* This function assumes that the number of local constraints per connected component
3028        is not greater than the number of nodes defined for the connected component
3029        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
3030   PetscFunctionBegin;
3031   n = 0;
3032   constraints_index = 0;
3033   if (pcbddc->ConstraintMatrix) {
3034     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
3035     max_size_of_constraint = 0;
3036     for (i=0;i<local_primal_size;i++) {
3037       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3038       if (size_of_constraint > 1) {
3039         n++;
3040       }
3041       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
3042       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3043     }
3044     if (constraints_idx) {
3045       ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr);
3046       ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr);
3047       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
3048       n = 0;
3049       for (i=0;i<local_primal_size;i++) {
3050         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3051         if (size_of_constraint > 1) {
3052           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
3053           /* find first untouched local node */
3054           j = 0;
3055           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
3056           min_index = row_cmat_global_indices[j];
3057           min_loc = j;
3058           /* search the minimum among nodes not yet touched on the connected component
3059              since there can be more than one constraint on a single cc */
3060           for (j=1;j<size_of_constraint;j++) {
3061             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
3062               min_index = row_cmat_global_indices[j];
3063               min_loc = j;
3064             }
3065           }
3066           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
3067           constraints_index[n++] = row_cmat_indices[min_loc];
3068         }
3069         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3070       }
3071       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
3072       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
3073     }
3074   }
3075   *n_constraints = n;
3076   if (constraints_idx) *constraints_idx = constraints_index;
3077   PetscFunctionReturn(0);
3078 }
3079 
3080 #undef __FUNCT__
3081 #define __FUNCT__ "PCBDDCSubsetNumbering"
3082 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
3083 {
3084   Vec            local_vec,global_vec;
3085   IS             seqis,paris;
3086   VecScatter     scatter_ctx;
3087   PetscScalar    *array;
3088   PetscInt       *temp_global_dofs;
3089   PetscScalar    globalsum;
3090   PetscInt       i,j,s;
3091   PetscInt       nlocals,first_index,old_index,max_local;
3092   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
3093   PetscMPIInt    *dof_sizes,*dof_displs;
3094   PetscBool      first_found;
3095   PetscErrorCode ierr;
3096 
3097   PetscFunctionBegin;
3098   /* mpi buffers */
3099   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
3100   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
3101   j = ( !rank_prec_comm ? size_prec_comm : 0);
3102   ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr);
3103   ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr);
3104   /* get maximum size of subset */
3105   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
3106   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
3107   max_local = 0;
3108   for (i=0;i<n_local_dofs;i++) {
3109     if (max_local < temp_global_dofs[i] ) {
3110       max_local = temp_global_dofs[i];
3111     }
3112   }
3113   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
3114   max_global++;
3115   max_local = 0;
3116   for (i=0;i<n_local_dofs;i++) {
3117     if (max_local < local_dofs[i] ) {
3118       max_local = local_dofs[i];
3119     }
3120   }
3121   max_local++;
3122   /* allocate workspace */
3123   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
3124   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
3125   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
3126   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
3127   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
3128   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
3129   /* create scatter */
3130   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
3131   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
3132   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
3133   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
3134   ierr = ISDestroy(&paris);CHKERRQ(ierr);
3135   /* init array */
3136   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
3137   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3138   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3139   if (local_dofs_mult) {
3140     for (i=0;i<n_local_dofs;i++) {
3141       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
3142     }
3143   } else {
3144     for (i=0;i<n_local_dofs;i++) {
3145       array[local_dofs[i]]=1.0;
3146     }
3147   }
3148   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3149   /* scatter into global vec and get total number of global dofs */
3150   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3151   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3152   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
3153   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
3154   /* Fill global_vec with cumulative function for global numbering */
3155   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
3156   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
3157   nlocals = 0;
3158   first_index = -1;
3159   first_found = PETSC_FALSE;
3160   for (i=0;i<s;i++) {
3161     if (!first_found && PetscRealPart(array[i]) > 0.1) {
3162       first_found = PETSC_TRUE;
3163       first_index = i;
3164     }
3165     nlocals += (PetscInt)PetscRealPart(array[i]);
3166   }
3167   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3168   if (!rank_prec_comm) {
3169     dof_displs[0]=0;
3170     for (i=1;i<size_prec_comm;i++) {
3171       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3172     }
3173   }
3174   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3175   if (first_found) {
3176     array[first_index] += (PetscScalar)nlocals;
3177     old_index = first_index;
3178     for (i=first_index+1;i<s;i++) {
3179       if (PetscRealPart(array[i]) > 0.1) {
3180         array[i] += array[old_index];
3181         old_index = i;
3182       }
3183     }
3184   }
3185   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
3186   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3187   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3188   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3189   /* get global ordering of local dofs */
3190   ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3191   if (local_dofs_mult) {
3192     for (i=0;i<n_local_dofs;i++) {
3193       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
3194     }
3195   } else {
3196     for (i=0;i<n_local_dofs;i++) {
3197       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
3198     }
3199   }
3200   ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3201   /* free workspace */
3202   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
3203   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
3204   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
3205   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
3206   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
3207   /* return pointer to global ordering of local dofs */
3208   *global_numbering_subset = temp_global_dofs;
3209   PetscFunctionReturn(0);
3210 }
3211 
3212 #undef __FUNCT__
3213 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3214 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3215 {
3216   PetscInt       i,j;
3217   PetscScalar    *alphas;
3218   PetscErrorCode ierr;
3219 
3220   PetscFunctionBegin;
3221   /* this implements stabilized Gram-Schmidt */
3222   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3223   for (i=0;i<n;i++) {
3224     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3225     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3226     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3227   }
3228   ierr = PetscFree(alphas);CHKERRQ(ierr);
3229   PetscFunctionReturn(0);
3230 }
3231 
3232 #undef __FUNCT__
3233 #define __FUNCT__ "MatISGetSubassemblingPattern"
3234 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
3235 {
3236   Mat             subdomain_adj;
3237   IS              new_ranks,ranks_send_to;
3238   MatPartitioning partitioner;
3239   Mat_IS          *matis;
3240   PetscInt        n_neighs,*neighs,*n_shared,**shared;
3241   PetscInt        prank;
3242   PetscMPIInt     size,rank,color;
3243   PetscInt        *xadj,*adjncy,*oldranks;
3244   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
3245   PetscInt        i,local_size,threshold=0;
3246   PetscErrorCode  ierr;
3247   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3248   PetscSubcomm    subcomm;
3249 
3250   PetscFunctionBegin;
3251   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3252   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3253   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3254 
3255   /* Get info on mapping */
3256   matis = (Mat_IS*)(mat->data);
3257   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3258   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3259 
3260   /* build local CSR graph of subdomains' connectivity */
3261   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3262   xadj[0] = 0;
3263   xadj[1] = PetscMax(n_neighs-1,0);
3264   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3265   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3266 
3267   if (threshold) {
3268     PetscInt xadj_count = 0;
3269     for (i=1;i<n_neighs;i++) {
3270       if (n_shared[i] > threshold) {
3271         adjncy[xadj_count] = neighs[i];
3272         adjncy_wgt[xadj_count] = n_shared[i];
3273         xadj_count++;
3274       }
3275     }
3276     xadj[1] = xadj_count;
3277   } else {
3278     if (xadj[1]) {
3279       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3280       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3281     }
3282   }
3283   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3284   if (use_square) {
3285     for (i=0;i<xadj[1];i++) {
3286       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3287     }
3288   }
3289   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3290 
3291   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3292 
3293   /*
3294     Restrict work on active processes only.
3295   */
3296   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3297   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3298   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3299   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3300   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3301   if (color) {
3302     ierr = PetscFree(xadj);CHKERRQ(ierr);
3303     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3304     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3305   } else {
3306     PetscInt coarsening_ratio;
3307     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
3308     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3309     prank = rank;
3310     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
3311     /*
3312     for (i=0;i<size;i++) {
3313       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3314     }
3315     */
3316     for (i=0;i<xadj[1];i++) {
3317       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3318     }
3319     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3320     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3321     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3322 
3323     /* Partition */
3324     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
3325     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3326     if (use_vwgt) {
3327       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3328       v_wgt[0] = local_size;
3329       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3330     }
3331     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3332     coarsening_ratio = size/n_subdomains;
3333     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3334     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3335     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3336     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3337 
3338     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3339     if (contiguous) {
3340       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3341     } else {
3342       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3343     }
3344     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3345     /* clean up */
3346     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3347     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3348     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3349     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3350   }
3351   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3352 
3353   /* assemble parallel IS for sends */
3354   i = 1;
3355   if (color) i=0;
3356   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3357 
3358   /* get back IS */
3359   *is_sends = ranks_send_to;
3360   PetscFunctionReturn(0);
3361 }
3362 
3363 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3364 
3365 #undef __FUNCT__
3366 #define __FUNCT__ "MatISSubassemble"
3367 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3368 {
3369   Mat                    local_mat;
3370   Mat_IS                 *matis;
3371   IS                     is_sends_internal;
3372   PetscInt               rows,cols,new_local_rows;
3373   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3374   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3375   ISLocalToGlobalMapping l2gmap;
3376   PetscInt*              l2gmap_indices;
3377   const PetscInt*        is_indices;
3378   MatType                new_local_type;
3379   /* buffers */
3380   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3381   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3382   PetscInt               *recv_buffer_idxs_local;
3383   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3384   /* MPI */
3385   MPI_Comm               comm,comm_n;
3386   PetscSubcomm           subcomm;
3387   PetscMPIInt            n_sends,n_recvs,commsize;
3388   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3389   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3390   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3391   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3392   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3393   PetscErrorCode         ierr;
3394 
3395   PetscFunctionBegin;
3396   /* TODO: add missing checks */
3397   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3398   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3399   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3400   PetscValidLogicalCollectiveInt(mat,nis,7);
3401   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3402   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3403   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3404   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3405   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3406   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3407   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3408   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3409     PetscInt mrows,mcols,mnrows,mncols;
3410     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3411     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3412     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3413     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3414     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3415     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3416   }
3417   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3418   PetscValidLogicalCollectiveInt(mat,bs,0);
3419   /* prepare IS for sending if not provided */
3420   if (!is_sends) {
3421     PetscBool pcontig = PETSC_TRUE;
3422     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3423     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3424   } else {
3425     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3426     is_sends_internal = is_sends;
3427   }
3428 
3429   /* get pointer of MATIS data */
3430   matis = (Mat_IS*)mat->data;
3431 
3432   /* get comm */
3433   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3434 
3435   /* compute number of sends */
3436   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3437   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3438 
3439   /* compute number of receives */
3440   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3441   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3442   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3443   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3444   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3445   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3446   ierr = PetscFree(iflags);CHKERRQ(ierr);
3447 
3448   /* restrict comm if requested */
3449   subcomm = 0;
3450   destroy_mat = PETSC_FALSE;
3451   if (restrict_comm) {
3452     PetscMPIInt color,subcommsize;
3453 
3454     color = 0;
3455     if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */
3456     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3457     subcommsize = commsize - subcommsize;
3458     /* check if reuse has been requested */
3459     if (reuse == MAT_REUSE_MATRIX) {
3460       if (*mat_n) {
3461         PetscMPIInt subcommsize2;
3462         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3463         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3464         comm_n = PetscObjectComm((PetscObject)*mat_n);
3465       } else {
3466         comm_n = PETSC_COMM_SELF;
3467       }
3468     } else { /* MAT_INITIAL_MATRIX */
3469       PetscMPIInt rank;
3470 
3471       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3472       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3473       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3474       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3475       comm_n = subcomm->comm;
3476     }
3477     /* flag to destroy *mat_n if not significative */
3478     if (color) destroy_mat = PETSC_TRUE;
3479   } else {
3480     comm_n = comm;
3481   }
3482 
3483   /* prepare send/receive buffers */
3484   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3485   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3486   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3487   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3488   if (nis) {
3489     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3490   }
3491 
3492   /* Get data from local matrices */
3493   if (!isdense) {
3494     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3495     /* TODO: See below some guidelines on how to prepare the local buffers */
3496     /*
3497        send_buffer_vals should contain the raw values of the local matrix
3498        send_buffer_idxs should contain:
3499        - MatType_PRIVATE type
3500        - PetscInt        size_of_l2gmap
3501        - PetscInt        global_row_indices[size_of_l2gmap]
3502        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3503     */
3504   } else {
3505     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3506     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3507     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3508     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3509     send_buffer_idxs[1] = i;
3510     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3511     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3512     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3513     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3514     for (i=0;i<n_sends;i++) {
3515       ilengths_vals[is_indices[i]] = len*len;
3516       ilengths_idxs[is_indices[i]] = len+2;
3517     }
3518   }
3519   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3520   /* additional is (if any) */
3521   if (nis) {
3522     PetscMPIInt psum;
3523     PetscInt j;
3524     for (j=0,psum=0;j<nis;j++) {
3525       PetscInt plen;
3526       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3527       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3528       psum += len+1; /* indices + lenght */
3529     }
3530     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3531     for (j=0,psum=0;j<nis;j++) {
3532       PetscInt plen;
3533       const PetscInt *is_array_idxs;
3534       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3535       send_buffer_idxs_is[psum] = plen;
3536       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3537       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3538       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3539       psum += plen+1; /* indices + lenght */
3540     }
3541     for (i=0;i<n_sends;i++) {
3542       ilengths_idxs_is[is_indices[i]] = psum;
3543     }
3544     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3545   }
3546 
3547   buf_size_idxs = 0;
3548   buf_size_vals = 0;
3549   buf_size_idxs_is = 0;
3550   for (i=0;i<n_recvs;i++) {
3551     buf_size_idxs += (PetscInt)olengths_idxs[i];
3552     buf_size_vals += (PetscInt)olengths_vals[i];
3553     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3554   }
3555   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3556   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3557   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3558 
3559   /* get new tags for clean communications */
3560   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3561   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3562   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3563 
3564   /* allocate for requests */
3565   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3566   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3567   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3568   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3569   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3570   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3571 
3572   /* communications */
3573   ptr_idxs = recv_buffer_idxs;
3574   ptr_vals = recv_buffer_vals;
3575   ptr_idxs_is = recv_buffer_idxs_is;
3576   for (i=0;i<n_recvs;i++) {
3577     source_dest = onodes[i];
3578     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3579     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3580     ptr_idxs += olengths_idxs[i];
3581     ptr_vals += olengths_vals[i];
3582     if (nis) {
3583       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3584       ptr_idxs_is += olengths_idxs_is[i];
3585     }
3586   }
3587   for (i=0;i<n_sends;i++) {
3588     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3589     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3590     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3591     if (nis) {
3592       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3593     }
3594   }
3595   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3596   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3597 
3598   /* assemble new l2g map */
3599   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3600   ptr_idxs = recv_buffer_idxs;
3601   new_local_rows = 0;
3602   for (i=0;i<n_recvs;i++) {
3603     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3604     ptr_idxs += olengths_idxs[i];
3605   }
3606   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3607   ptr_idxs = recv_buffer_idxs;
3608   new_local_rows = 0;
3609   for (i=0;i<n_recvs;i++) {
3610     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3611     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3612     ptr_idxs += olengths_idxs[i];
3613   }
3614   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3615   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3616   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3617 
3618   /* infer new local matrix type from received local matrices type */
3619   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3620   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3621   if (n_recvs) {
3622     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3623     ptr_idxs = recv_buffer_idxs;
3624     for (i=0;i<n_recvs;i++) {
3625       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3626         new_local_type_private = MATAIJ_PRIVATE;
3627         break;
3628       }
3629       ptr_idxs += olengths_idxs[i];
3630     }
3631     switch (new_local_type_private) {
3632       case MATDENSE_PRIVATE:
3633         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3634           new_local_type = MATSEQAIJ;
3635           bs = 1;
3636         } else { /* if I receive only 1 dense matrix */
3637           new_local_type = MATSEQDENSE;
3638           bs = 1;
3639         }
3640         break;
3641       case MATAIJ_PRIVATE:
3642         new_local_type = MATSEQAIJ;
3643         bs = 1;
3644         break;
3645       case MATBAIJ_PRIVATE:
3646         new_local_type = MATSEQBAIJ;
3647         break;
3648       case MATSBAIJ_PRIVATE:
3649         new_local_type = MATSEQSBAIJ;
3650         break;
3651       default:
3652         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3653         break;
3654     }
3655   } else { /* by default, new_local_type is seqdense */
3656     new_local_type = MATSEQDENSE;
3657     bs = 1;
3658   }
3659 
3660   /* create MATIS object if needed */
3661   if (reuse == MAT_INITIAL_MATRIX) {
3662     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3663     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3664   } else {
3665     /* it also destroys the local matrices */
3666     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3667   }
3668   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3669   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3670 
3671   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3672 
3673   /* Global to local map of received indices */
3674   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3675   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3676   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3677 
3678   /* restore attributes -> type of incoming data and its size */
3679   buf_size_idxs = 0;
3680   for (i=0;i<n_recvs;i++) {
3681     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3682     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3683     buf_size_idxs += (PetscInt)olengths_idxs[i];
3684   }
3685   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3686 
3687   /* set preallocation */
3688   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3689   if (!newisdense) {
3690     PetscInt *new_local_nnz=0;
3691 
3692     ptr_vals = recv_buffer_vals;
3693     ptr_idxs = recv_buffer_idxs_local;
3694     if (n_recvs) {
3695       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3696     }
3697     for (i=0;i<n_recvs;i++) {
3698       PetscInt j;
3699       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3700         for (j=0;j<*(ptr_idxs+1);j++) {
3701           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3702         }
3703       } else {
3704         /* TODO */
3705       }
3706       ptr_idxs += olengths_idxs[i];
3707     }
3708     if (new_local_nnz) {
3709       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3710       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3711       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3712       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3713       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3714       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3715     } else {
3716       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3717     }
3718     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3719   } else {
3720     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3721   }
3722 
3723   /* set values */
3724   ptr_vals = recv_buffer_vals;
3725   ptr_idxs = recv_buffer_idxs_local;
3726   for (i=0;i<n_recvs;i++) {
3727     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3728       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3729       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3730       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3731       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3732       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3733     } else {
3734       /* TODO */
3735     }
3736     ptr_idxs += olengths_idxs[i];
3737     ptr_vals += olengths_vals[i];
3738   }
3739   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3740   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3741   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3742   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3743   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3744   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3745 
3746 #if 0
3747   if (!restrict_comm) { /* check */
3748     Vec       lvec,rvec;
3749     PetscReal infty_error;
3750 
3751     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3752     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3753     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3754     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3755     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3756     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3757     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3758     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3759     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3760   }
3761 #endif
3762 
3763   /* assemble new additional is (if any) */
3764   if (nis) {
3765     PetscInt **temp_idxs,*count_is,j,psum;
3766 
3767     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3768     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3769     ptr_idxs = recv_buffer_idxs_is;
3770     psum = 0;
3771     for (i=0;i<n_recvs;i++) {
3772       for (j=0;j<nis;j++) {
3773         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3774         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3775         psum += plen;
3776         ptr_idxs += plen+1; /* shift pointer to received data */
3777       }
3778     }
3779     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3780     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3781     for (i=1;i<nis;i++) {
3782       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3783     }
3784     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3785     ptr_idxs = recv_buffer_idxs_is;
3786     for (i=0;i<n_recvs;i++) {
3787       for (j=0;j<nis;j++) {
3788         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3789         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3790         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3791         ptr_idxs += plen+1; /* shift pointer to received data */
3792       }
3793     }
3794     for (i=0;i<nis;i++) {
3795       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3796       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3797       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3798     }
3799     ierr = PetscFree(count_is);CHKERRQ(ierr);
3800     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3801     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3802   }
3803   /* free workspace */
3804   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3805   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3806   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3807   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3808   if (isdense) {
3809     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3810     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3811   } else {
3812     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3813   }
3814   if (nis) {
3815     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3816     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3817   }
3818   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3819   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3820   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3821   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3822   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3823   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3824   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3825   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3826   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3827   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3828   ierr = PetscFree(onodes);CHKERRQ(ierr);
3829   if (nis) {
3830     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3831     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3832     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3833   }
3834   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3835   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3836     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3837     for (i=0;i<nis;i++) {
3838       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3839     }
3840   }
3841   PetscFunctionReturn(0);
3842 }
3843 
3844 /* temporary hack into ksp private data structure */
3845 #include <petsc-private/kspimpl.h>
3846 
3847 #undef __FUNCT__
3848 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3849 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3850 {
3851   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3852   PC_IS                  *pcis = (PC_IS*)pc->data;
3853   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3854   MatNullSpace           CoarseNullSpace=NULL;
3855   ISLocalToGlobalMapping coarse_islg;
3856   IS                     coarse_is,*isarray;
3857   PetscInt               i,im_active=-1,active_procs=-1;
3858   PetscInt               nis,nisdofs,nisneu;
3859   PC                     pc_temp;
3860   PCType                 coarse_pc_type;
3861   KSPType                coarse_ksp_type;
3862   PetscBool              multilevel_requested,multilevel_allowed;
3863   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3864   Mat                    t_coarse_mat_is;
3865   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3866   PetscMPIInt            all_procs;
3867   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3868   PetscBool              compute_vecs = PETSC_FALSE;
3869   PetscScalar            *array;
3870   PetscErrorCode         ierr;
3871 
3872   PetscFunctionBegin;
3873   /* Assign global numbering to coarse dofs */
3874   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3875     compute_vecs = PETSC_TRUE;
3876     PetscInt ocoarse_size;
3877     ocoarse_size = pcbddc->coarse_size;
3878     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3879     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3880     /* see if we can avoid some work */
3881     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3882       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3883         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3884         coarse_reuse = PETSC_FALSE;
3885       } else { /* we can safely reuse already computed coarse matrix */
3886         coarse_reuse = PETSC_TRUE;
3887       }
3888     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3889       coarse_reuse = PETSC_FALSE;
3890     }
3891     /* reset any subassembling information */
3892     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3893     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3894   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3895     coarse_reuse = PETSC_TRUE;
3896   }
3897 
3898   /* count "active" (i.e. with positive local size) and "void" processes */
3899   im_active = !!(pcis->n);
3900   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3901   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3902   void_procs = all_procs-active_procs;
3903   csin_type_simple = PETSC_TRUE;
3904   redist = PETSC_FALSE;
3905   if (pcbddc->current_level && void_procs) {
3906     csin_ml = PETSC_TRUE;
3907     ncoarse_ml = void_procs;
3908     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
3909     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
3910       csin_ds = PETSC_TRUE;
3911       ncoarse_ds = pcbddc->redistribute_coarse;
3912       redist = PETSC_TRUE;
3913     } else {
3914       csin_ds = PETSC_TRUE;
3915       ncoarse_ds = active_procs;
3916       redist = PETSC_TRUE;
3917     }
3918   } else {
3919     csin_ml = PETSC_FALSE;
3920     ncoarse_ml = all_procs;
3921     if (void_procs) {
3922       csin_ds = PETSC_TRUE;
3923       ncoarse_ds = void_procs;
3924       csin_type_simple = PETSC_FALSE;
3925     } else {
3926       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
3927         csin_ds = PETSC_TRUE;
3928         ncoarse_ds = pcbddc->redistribute_coarse;
3929         redist = PETSC_TRUE;
3930       } else {
3931         csin_ds = PETSC_FALSE;
3932         ncoarse_ds = all_procs;
3933       }
3934     }
3935   }
3936 
3937   /*
3938     test if we can go multilevel: three conditions must be satisfied:
3939     - we have not exceeded the number of levels requested
3940     - we can actually subassemble the active processes
3941     - we can find a suitable number of MPI processes where we can place the subassembled problem
3942   */
3943   multilevel_allowed = PETSC_FALSE;
3944   multilevel_requested = PETSC_FALSE;
3945   if (pcbddc->current_level < pcbddc->max_levels) {
3946     multilevel_requested = PETSC_TRUE;
3947     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3948       multilevel_allowed = PETSC_FALSE;
3949     } else {
3950       multilevel_allowed = PETSC_TRUE;
3951     }
3952   }
3953   /* determine number of process partecipating to coarse solver */
3954   if (multilevel_allowed) {
3955     ncoarse = ncoarse_ml;
3956     csin = csin_ml;
3957   } else {
3958     ncoarse = ncoarse_ds;
3959     csin = csin_ds;
3960   }
3961 
3962   /* creates temporary l2gmap and IS for coarse indexes */
3963   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3964   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3965 
3966   /* creates temporary MATIS object for coarse matrix */
3967   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
3968   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3969   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
3970   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3971 #if 0
3972   {
3973     PetscViewer viewer;
3974     char filename[256];
3975     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
3976     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
3977     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3978     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
3979     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3980   }
3981 #endif
3982   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
3983   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3984   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3985   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3986   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3987 
3988   /* compute dofs splitting and neumann boundaries for coarse dofs */
3989   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
3990     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3991     const PetscInt         *idxs;
3992     ISLocalToGlobalMapping tmap;
3993 
3994     /* create map between primal indices (in local representative ordering) and local primal numbering */
3995     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3996     /* allocate space for temporary storage */
3997     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
3998     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
3999     /* allocate for IS array */
4000     nisdofs = pcbddc->n_ISForDofsLocal;
4001     nisneu = !!pcbddc->NeumannBoundariesLocal;
4002     nis = nisdofs + nisneu;
4003     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
4004     /* dofs splitting */
4005     for (i=0;i<nisdofs;i++) {
4006       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4007       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4008       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4009       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4010       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4011       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4012       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4013       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4014     }
4015     /* neumann boundaries */
4016     if (pcbddc->NeumannBoundariesLocal) {
4017       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4018       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4019       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4020       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4021       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4022       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4023       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4024       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4025     }
4026     /* free memory */
4027     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4028     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4029     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4030   } else {
4031     nis = 0;
4032     nisdofs = 0;
4033     nisneu = 0;
4034     isarray = NULL;
4035   }
4036   /* destroy no longer needed map */
4037   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4038 
4039   /* restrict on coarse candidates (if needed) */
4040   coarse_mat_is = NULL;
4041   if (csin) {
4042     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4043       if (redist) {
4044         PetscMPIInt rank;
4045         PetscInt    spc,n_spc_p1,dest[1],destsize;
4046 
4047         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4048         spc = active_procs/pcbddc->redistribute_coarse;
4049         n_spc_p1 = active_procs%pcbddc->redistribute_coarse;
4050         if (im_active) {
4051           destsize = 1;
4052           if (rank > n_spc_p1*(spc+1)-1) {
4053             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4054           } else {
4055             dest[0] = rank/(spc+1);
4056           }
4057         } else {
4058           destsize = 0;
4059         }
4060         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4061       } else if (csin_type_simple) {
4062         PetscMPIInt rank;
4063         PetscInt    issize,isidx;
4064 
4065         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4066         if (im_active) {
4067           issize = 1;
4068           isidx = (PetscInt)rank;
4069         } else {
4070           issize = 0;
4071           isidx = -1;
4072         }
4073         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4074       } else { /* get a suitable subassembling pattern from MATIS code */
4075         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4076       }
4077 
4078       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4079       if (!redist || ncoarse <= void_procs) {
4080         PetscInt ncoarse_cand,tissize,*nisindices;
4081         PetscInt *coarse_candidates;
4082         const PetscInt* tisindices;
4083 
4084         /* get coarse candidates' ranks in pc communicator */
4085         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4086         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4087         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4088           if (!coarse_candidates[i]) {
4089             coarse_candidates[ncoarse_cand++]=i;
4090           }
4091         }
4092         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4093 
4094 
4095         if (pcbddc->dbg_flag) {
4096           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4097           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4098           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4099           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4100           for (i=0;i<ncoarse_cand;i++) {
4101             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4102           }
4103           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4104           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4105         }
4106         /* shift the pattern on coarse candidates */
4107         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4108         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4109         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4110         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4111         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4112         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4113         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4114       }
4115       if (pcbddc->dbg_flag) {
4116         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4117         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4118         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4119         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4120       }
4121     }
4122     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4123     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4124   } else {
4125     if (pcbddc->dbg_flag) {
4126       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4127       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4128       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4129     }
4130     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4131     coarse_mat_is = t_coarse_mat_is;
4132   }
4133 
4134   /* create local to global scatters for coarse problem */
4135   if (compute_vecs) {
4136     PetscInt lrows;
4137     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4138     if (coarse_mat_is) {
4139       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4140     } else {
4141       lrows = 0;
4142     }
4143     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4144     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4145     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4146     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4147     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4148   }
4149   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4150   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4151 
4152   /* set defaults for coarse KSP and PC */
4153   if (multilevel_allowed) {
4154     coarse_ksp_type = KSPRICHARDSON;
4155     coarse_pc_type = PCBDDC;
4156   } else {
4157     coarse_ksp_type = KSPPREONLY;
4158     coarse_pc_type = PCREDUNDANT;
4159   }
4160 
4161   /* print some info if requested */
4162   if (pcbddc->dbg_flag) {
4163     if (!multilevel_allowed) {
4164       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4165       if (multilevel_requested) {
4166         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4167       } else if (pcbddc->max_levels) {
4168         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4169       }
4170       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4171     }
4172   }
4173 
4174   /* create the coarse KSP object only once with defaults */
4175   if (coarse_mat_is) {
4176     MatReuse coarse_mat_reuse;
4177     PetscViewer dbg_viewer = NULL;
4178     if (pcbddc->dbg_flag) {
4179       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4180       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4181     }
4182     if (!pcbddc->coarse_ksp) {
4183       char prefix[256],str_level[16];
4184       size_t len;
4185       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4186       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4187       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4188       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4189       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4190       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4191       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4192       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4193       /* prefix */
4194       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4195       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4196       if (!pcbddc->current_level) {
4197         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4198         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4199       } else {
4200         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4201         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4202         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4203         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4204         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4205         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4206       }
4207       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4208       /* allow user customization */
4209       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4210       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4211     }
4212 
4213     /* get some info after set from options */
4214     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4215     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4216     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4217     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4218     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4219       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4220       isbddc = PETSC_FALSE;
4221     }
4222     if (isredundant) {
4223       KSP inner_ksp;
4224       PC inner_pc;
4225       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4226       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4227       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4228     }
4229 
4230     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4231     ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4232     ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4233     ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4234     if (nisdofs) {
4235       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4236       for (i=0;i<nisdofs;i++) {
4237         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4238       }
4239     }
4240     if (nisneu) {
4241       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4242       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4243     }
4244 
4245     /* assemble coarse matrix */
4246     if (coarse_reuse) {
4247       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4248       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4249       coarse_mat_reuse = MAT_REUSE_MATRIX;
4250     } else {
4251       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4252     }
4253     if (isbddc || isnn) {
4254       if (pcbddc->coarsening_ratio > 1) {
4255         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4256           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4257           if (pcbddc->dbg_flag) {
4258             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4259             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4260             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4261             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4262           }
4263         }
4264         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4265       } else {
4266         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4267         coarse_mat = coarse_mat_is;
4268       }
4269     } else {
4270       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4271     }
4272     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4273 
4274     /* propagate symmetry info to coarse matrix */
4275     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
4276     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4277 
4278     /* set operators */
4279     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4280     if (pcbddc->dbg_flag) {
4281       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4282     }
4283   } else { /* processes non partecipating to coarse solver (if any) */
4284     coarse_mat = 0;
4285   }
4286   ierr = PetscFree(isarray);CHKERRQ(ierr);
4287 #if 0
4288   {
4289     PetscViewer viewer;
4290     char filename[256];
4291     sprintf(filename,"coarse_mat.m");
4292     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4293     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4294     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4295     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4296   }
4297 #endif
4298 
4299   /* Compute coarse null space (special handling by BDDC only) */
4300   if (pcbddc->NullSpace) {
4301     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4302   }
4303 
4304   if (pcbddc->coarse_ksp) {
4305     Vec crhs,csol;
4306     PetscBool ispreonly;
4307     if (CoarseNullSpace) {
4308       if (isbddc) {
4309         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4310       } else {
4311         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4312       }
4313     }
4314     /* setup coarse ksp */
4315     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4316     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4317     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4318     /* hack */
4319     if (!csol) {
4320       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4321     }
4322     if (!crhs) {
4323       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4324     }
4325     /* Check coarse problem if in debug mode or if solving with an iterative method */
4326     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4327     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4328       KSP       check_ksp;
4329       KSPType   check_ksp_type;
4330       PC        check_pc;
4331       Vec       check_vec,coarse_vec;
4332       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4333       PetscInt  its;
4334       PetscBool compute_eigs;
4335       PetscReal *eigs_r,*eigs_c;
4336       PetscInt  neigs;
4337       const char *prefix;
4338 
4339       /* Create ksp object suitable for estimation of extreme eigenvalues */
4340       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4341       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4342       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4343       if (ispreonly) {
4344         check_ksp_type = KSPPREONLY;
4345         compute_eigs = PETSC_FALSE;
4346       } else {
4347         check_ksp_type = KSPGMRES;
4348         compute_eigs = PETSC_TRUE;
4349       }
4350       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4351       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4352       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4353       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4354       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4355       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4356       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4357       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4358       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4359       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4360       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4361       /* create random vec */
4362       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4363       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4364       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4365       if (CoarseNullSpace) {
4366         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4367       }
4368       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4369       /* solve coarse problem */
4370       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4371       if (CoarseNullSpace) {
4372         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4373       }
4374       /* set eigenvalue estimation if preonly has not been requested */
4375       if (compute_eigs) {
4376         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4377         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4378         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4379         lambda_max = eigs_r[neigs-1];
4380         lambda_min = eigs_r[0];
4381         if (pcbddc->use_coarse_estimates) {
4382           if (lambda_max>lambda_min) {
4383             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4384             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4385           }
4386         }
4387       }
4388 
4389       /* check coarse problem residual error */
4390       if (pcbddc->dbg_flag) {
4391         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4392         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4393         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4394         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4395         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4396         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4397         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4398         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4399         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4400         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4401         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4402         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4403         if (compute_eigs) {
4404           PetscReal lambda_max_s,lambda_min_s;
4405           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4406           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4407           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4408           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4409           for (i=0;i<neigs;i++) {
4410             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4411           }
4412         }
4413         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4414         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4415       }
4416       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4417       if (compute_eigs) {
4418         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4419         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4420       }
4421     }
4422   }
4423   /* print additional info */
4424   if (pcbddc->dbg_flag) {
4425     /* waits until all processes reaches this point */
4426     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4427     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4428     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4429   }
4430 
4431   /* free memory */
4432   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4433   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4434   PetscFunctionReturn(0);
4435 }
4436 
4437 #undef __FUNCT__
4438 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4439 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4440 {
4441   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4442   PC_IS*         pcis = (PC_IS*)pc->data;
4443   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4444   PetscInt       i,coarse_size;
4445   PetscInt       *local_primal_indices;
4446   PetscErrorCode ierr;
4447 
4448   PetscFunctionBegin;
4449   /* Compute global number of coarse dofs */
4450   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
4451     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
4452   }
4453   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
4454 
4455   /* check numbering */
4456   if (pcbddc->dbg_flag) {
4457     PetscScalar coarsesum,*array;
4458     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4459 
4460     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4461     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4462     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4463     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4464     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4465     for (i=0;i<pcbddc->local_primal_size;i++) {
4466       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4467     }
4468     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4469     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4470     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4471     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4472     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4473     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4474     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4475     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4476     for (i=0;i<pcis->n;i++) {
4477       if (array[i] == 1.0) {
4478         set_error = PETSC_TRUE;
4479         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4480       }
4481     }
4482     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4483     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4484     for (i=0;i<pcis->n;i++) {
4485       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4486     }
4487     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4488     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4489     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4490     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4491     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4492     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4493     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4494       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4495       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4496       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4497       for (i=0;i<pcbddc->local_primal_size;i++) {
4498         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
4499       }
4500       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4501     }
4502     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4503     if (set_error_reduced) {
4504       SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4505     }
4506   }
4507   /* get back data */
4508   *coarse_size_n = coarse_size;
4509   *local_primal_indices_n = local_primal_indices;
4510   PetscFunctionReturn(0);
4511 }
4512 
4513 #undef __FUNCT__
4514 #define __FUNCT__ "PCBDDCGlobalToLocal"
4515 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4516 {
4517   IS             localis_t;
4518   PetscInt       i,lsize,*idxs,n;
4519   PetscScalar    *vals;
4520   PetscErrorCode ierr;
4521 
4522   PetscFunctionBegin;
4523   /* get indices in local ordering exploiting local to global map */
4524   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4525   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4526   for (i=0;i<lsize;i++) vals[i] = 1.0;
4527   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4528   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4529   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4530   if (idxs) { /* multilevel guard */
4531     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4532   }
4533   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4534   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4535   ierr = PetscFree(vals);CHKERRQ(ierr);
4536   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4537   /* now compute set in local ordering */
4538   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4539   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4540   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4541   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4542   for (i=0,lsize=0;i<n;i++) {
4543     if (PetscRealPart(vals[i]) > 0.5) {
4544       lsize++;
4545     }
4546   }
4547   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4548   for (i=0,lsize=0;i<n;i++) {
4549     if (PetscRealPart(vals[i]) > 0.5) {
4550       idxs[lsize++] = i;
4551     }
4552   }
4553   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4554   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4555   *localis = localis_t;
4556   PetscFunctionReturn(0);
4557 }
4558 
4559 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4560 #undef __FUNCT__
4561 #define __FUNCT__ "PCBDDCMatMult_Private"
4562 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4563 {
4564   PCBDDCChange_ctx change_ctx;
4565   PetscErrorCode   ierr;
4566 
4567   PetscFunctionBegin;
4568   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4569   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4570   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4571   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4572   PetscFunctionReturn(0);
4573 }
4574 
4575 #undef __FUNCT__
4576 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4577 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4578 {
4579   PCBDDCChange_ctx change_ctx;
4580   PetscErrorCode   ierr;
4581 
4582   PetscFunctionBegin;
4583   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4584   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4585   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4586   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4587   PetscFunctionReturn(0);
4588 }
4589 
4590 #undef __FUNCT__
4591 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4592 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4593 {
4594   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4595   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4596   PetscInt            *used_xadj,*used_adjncy;
4597   PetscBool           free_used_adj;
4598   PetscErrorCode      ierr;
4599 
4600   PetscFunctionBegin;
4601   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4602   free_used_adj = PETSC_FALSE;
4603   if (pcbddc->sub_schurs_layers == -1) {
4604     used_xadj = NULL;
4605     used_adjncy = NULL;
4606   } else {
4607     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4608       used_xadj = pcbddc->mat_graph->xadj;
4609       used_adjncy = pcbddc->mat_graph->adjncy;
4610     } else if (pcbddc->computed_rowadj) {
4611       used_xadj = pcbddc->mat_graph->xadj;
4612       used_adjncy = pcbddc->mat_graph->adjncy;
4613     } else {
4614       Mat            mat_adj;
4615       PetscBool      flg_row=PETSC_TRUE;
4616       const PetscInt *xadj,*adjncy;
4617       PetscInt       nvtxs;
4618 
4619       ierr = MatConvert(pcbddc->local_mat,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
4620       ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4621       if (!flg_row) {
4622         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
4623       }
4624       ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4625       ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4626       ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4627       ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4628       if (!flg_row) {
4629         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
4630       }
4631       ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
4632       free_used_adj = PETSC_TRUE;
4633     }
4634   }
4635   ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->adaptive_selection,pcbddc->use_deluxe_scaling,pcbddc->adaptive_invert_Stildas,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4636 
4637   /* free adjacency */
4638   if (free_used_adj) {
4639     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4640   }
4641   PetscFunctionReturn(0);
4642 }
4643 
4644 #undef __FUNCT__
4645 #define __FUNCT__ "PCBDDCInitSubSchurs"
4646 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4647 {
4648   PC_IS               *pcis=(PC_IS*)pc->data;
4649   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4650   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4651   PCBDDCGraph         graph;
4652   Mat                 S_j;
4653   PetscErrorCode      ierr;
4654 
4655   PetscFunctionBegin;
4656   /* attach interface graph for determining subsets */
4657   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4658     IS verticesIS;
4659 
4660     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4661     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4662     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr);
4663     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr);
4664     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4665     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4666 /*
4667     if (pcbddc->dbg_flag) {
4668       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4669     }
4670 */
4671   } else {
4672     graph = pcbddc->mat_graph;
4673   }
4674 
4675   /* Create Schur complement matrix */
4676   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4677   ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4678 
4679   /* sub_schurs init */
4680   ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_threshold);CHKERRQ(ierr);
4681   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4682   /* free graph struct */
4683   if (pcbddc->sub_schurs_rebuild) {
4684     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4685   }
4686   PetscFunctionReturn(0);
4687 }
4688