xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 2fffb893659afe9d3c144518e55ce6069fda9aa2)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscScalar     *Smult,*Seigv;
20   PetscReal       *eigs,thresh;
21   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscBool       symmetric;
26   PetscErrorCode  ierr;
27 
28   PetscFunctionBegin;
29   if (!sub_schurs->use_mumps) {
30     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
31   }
32   ierr = MatIsSymmetric(sub_schurs->A,0.0,&symmetric);CHKERRQ(ierr);
33   if (!symmetric) {
34     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented");
35   }
36 
37   /* max size of subsets */
38   mss = 0;
39   for (i=0;i<sub_schurs->n_subs;i++) {
40     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
41       PetscInt subset_size;
42       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
43       mss = PetscMax(mss,subset_size);
44     }
45   }
46 
47   /* min/max and threshold */
48   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
49   nmin = pcbddc->adaptive_nmin > -1 ? pcbddc->adaptive_nmin : 1;
50   nmax = PetscMax(nmin,nmax);
51   if (pcbddc->adaptive_threshold > 1.0) {
52     thresh = 1.0/pcbddc->adaptive_threshold;
53   } else {
54     thresh = 1.0;
55   }
56 
57   /* allocate lapack workspace */
58   cum = cum2 = 0;
59   maxneigs = 0;
60   for (i=0;i<sub_schurs->n_subs;i++) {
61     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
62       PetscInt n,subset_size;
63       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
64       n = PetscMin(subset_size,nmax);
65       cum += subset_size*n;
66       cum2 += n;
67       maxneigs = PetscMax(maxneigs,n);
68     }
69   }
70 
71   if (mss) {
72     if (symmetric) {
73       PetscBLASInt B_itype = 1;
74       PetscBLASInt B_N = mss;
75       PetscScalar  zero = 0.0;
76       PetscScalar  eps = 0.0; /* dlamch? */
77 
78       B_lwork = -1;
79       S = NULL;
80       St = NULL;
81       eigs = NULL;
82       eigv = NULL;
83       B_iwork = NULL;
84       B_ifail = NULL;
85       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
86 #if defined(PETSC_USE_COMPLEX)
87       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
88 #else
89       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
90 #endif
91       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
92       ierr = PetscFPTrapPop();CHKERRQ(ierr);
93     } else {
94         /* TODO */
95     }
96   } else {
97     lwork = 0;
98   }
99 
100   nv = 0;
101   if (sub_schurs->is_Ej_com) { /* complement of subsets, each entry is a vertex */
102     ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr);
103   }
104   ierr = PetscBLASIntCast((PetscInt)lwork,&B_lwork);CHKERRQ(ierr);
105   ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs,
106                       B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
107 #if defined(PETSC_USE_COMPLEX)
108   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
109 #endif
110   ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr);
111   ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
112                       nv+cum2+1,&pcbddc->adaptive_constraints_ptrs,
113                       nv+cum,&pcbddc->adaptive_constraints_idxs,
114                       nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
115   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
116 
117   maxneigs = 0;
118   cum = cum2 = cumarray = 0;
119   if (sub_schurs->is_Ej_com) {
120     const PetscInt *idxs;
121 
122     ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
123     for (cum=0;cum<nv;cum++) {
124       pcbddc->adaptive_constraints_n[cum] = 1;
125       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
126       pcbddc->adaptive_constraints_ptrs[cum] = cum;
127       pcbddc->adaptive_constraints_data[cum] = 1.0;
128     }
129     cum2 = cum;
130     ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
131   }
132 
133   if (mss) { /* multilevel */
134     if (pcbddc->use_deluxe_scaling) {
135       ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
136     } else {
137       ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr);
138     }
139     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
140   }
141 
142   for (i=0;i<sub_schurs->n_subs;i++) {
143     PetscInt j,subset_size;
144 
145     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
146     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
147       const PetscInt *idxs;
148       PetscScalar    one = 1.0,zero=0.0;
149       PetscBLASInt   B_N;
150 
151       /* S should be copied since we need it for deluxe scaling */
152       if (symmetric) {
153         PetscInt j;
154         for (j=0;j<subset_size;j++) {
155           ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
156         }
157         for (j=0;j<subset_size;j++) {
158           ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
159         }
160       } else {
161         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
162         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
163       }
164       /* is always this the right matrix? */
165       ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
166 
167       /* we could reuse space already allocated when building sum_S_Ej_tilda_all */
168       /* St = Starray+cumarray; */
169 
170       if (symmetric) {
171         PetscBLASInt B_itype = 1;
172         PetscBLASInt B_IL = 1, B_IU;
173         PetscScalar  eps = -1.0; /* dlamch? */
174         PetscInt     nmin_s;
175 
176         /* ask for eigenvalues lower than thresh */
177         PetscPrintf(PETSC_COMM_SELF,"[%d] Computing for sub %d/%d.\n",PetscGlobalRank,i,sub_schurs->n_subs);
178         ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
179         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
180 #if defined(PETSC_USE_COMPLEX)
181         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
182 #else
183         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
184 #endif
185         ierr = PetscFPTrapPop();CHKERRQ(ierr);
186         if (B_ierr) {
187           if (B_ierr < 0 ) {
188             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
189           } else if (B_ierr <= B_N) {
190             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
191           } else {
192             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
193           }
194         }
195 
196         if (B_neigs > nmax) {
197           PetscPrintf(PETSC_COMM_SELF,"[%d]   found %d eigs, more than maximum required %d.\n",PetscGlobalRank,B_neigs,nmax);
198           B_neigs = nmax;
199         }
200 
201         nmin_s = PetscMin(nmin,B_N);
202         if (B_neigs < nmin_s) {
203           PetscBLASInt B_neigs2;
204 
205           B_IL = B_neigs + 1;
206           ierr = PetscBLASIntCast(nmin_s,&B_IU);CHKERRQ(ierr);
207           PetscPrintf(PETSC_COMM_SELF,"[%d]   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",PetscGlobalRank,B_neigs,nmin,B_IL,B_IU);
208           if (symmetric) {
209             PetscInt j;
210             for (j=0;j<subset_size;j++) {
211               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
212             }
213             for (j=0;j<subset_size;j++) {
214               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
215             }
216           } else {
217             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
218             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
219           }
220           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
221 #if defined(PETSC_USE_COMPLEX)
222           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
223 #else
224           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
225 #endif
226           ierr = PetscFPTrapPop();CHKERRQ(ierr);
227           B_neigs += B_neigs2;
228         }
229         if (B_ierr) {
230           if (B_ierr < 0 ) {
231             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
232           } else if (B_ierr <= B_N) {
233             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
234           } else {
235             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
236           }
237         }
238         PetscPrintf(PETSC_COMM_SELF,"[%d]   -> Got %d eigs\n",PetscGlobalRank,B_neigs);
239         for (j=0;j<B_neigs;j++) {
240           if (eigs[j] == 0.0) {
241             PetscPrintf(PETSC_COMM_SELF,"[%d]     Inf\n",PetscGlobalRank);
242           } else {
243             PetscPrintf(PETSC_COMM_SELF,"[%d]     %1.6e\n",PetscGlobalRank,1.0/eigs[j]);
244           }
245         }
246       } else {
247           /* TODO */
248       }
249       maxneigs = PetscMax(B_neigs,maxneigs);
250       pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
251 
252       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
253       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&zero,Seigv,&B_N));
254       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
255 #if 0
256       PetscInt ii;
257       for (ii=0;ii<B_neigs;ii++) {
258         PetscPrintf(PETSC_COMM_SELF,"[%d]   -> Eigenvector %d/%d (%d)\n",PetscGlobalRank,ii,B_neigs,B_N);
259         for (j=0;j<B_N;j++) {
260           PetscPrintf(PETSC_COMM_SELF,"[%d]       %1.4e %1.4e\n",PetscGlobalRank,eigv[ii*B_N+j],Seigv[ii*B_N+j]);
261         }
262       }
263 #endif
264       ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
265       for (j=0;j<B_neigs;j++) {
266 #if 0
267         {
268           PetscBLASInt Blas_N,Blas_one = 1.0;
269           PetscScalar norm;
270           ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
271           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
272           if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
273             norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
274           } else {
275             norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
276           }
277           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
278         }
279 #endif
280         ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
281         pcbddc->adaptive_constraints_ptrs[cum++] = cum2;
282         cum2 += subset_size;
283       }
284       ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
285     }
286     /* shift for next computation */
287     cumarray += subset_size*subset_size;
288   }
289   pcbddc->adaptive_constraints_ptrs[cum] = cum2;
290   ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr);
291 
292   if (mss) {
293     if (pcbddc->use_deluxe_scaling) {
294       ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
295     } else {
296       ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr);
297     }
298     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
299   }
300   ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
301 #if defined(PETSC_USE_COMPLEX)
302   ierr = PetscFree(rwork);CHKERRQ(ierr);
303 #endif
304   if (pcbddc->dbg_flag) {
305     PetscInt maxneigs_r;
306     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
307     ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
308   }
309   PetscFunctionReturn(0);
310 }
311 
312 #undef __FUNCT__
313 #define __FUNCT__ "PCBDDCSetUpSolvers"
314 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
315 {
316   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
317   PetscScalar    *coarse_submat_vals;
318   PetscErrorCode ierr;
319 
320   PetscFunctionBegin;
321   /* Setup local scatters R_to_B and (optionally) R_to_D */
322   /* PCBDDCSetUpLocalWorkVectors should be called first! */
323   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
324 
325   /* Setup local neumann solver ksp_R */
326   /* PCBDDCSetUpLocalScatters should be called first! */
327   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
328 
329   /* Change global null space passed in by the user if change of basis has been requested */
330   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
331     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
332   }
333 
334   /*
335      Setup local correction and local part of coarse basis.
336      Gives back the dense local part of the coarse matrix in column major ordering
337   */
338   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
339 
340   /* Compute total number of coarse nodes and setup coarse solver */
341   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
342 
343   /* free */
344   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
345   PetscFunctionReturn(0);
346 }
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "PCBDDCResetCustomization"
350 PetscErrorCode PCBDDCResetCustomization(PC pc)
351 {
352   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
353   PetscErrorCode ierr;
354 
355   PetscFunctionBegin;
356   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
357   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
358   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
359   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
360   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
361   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
362   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
363   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
364   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
365   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
366   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
367   PetscFunctionReturn(0);
368 }
369 
370 #undef __FUNCT__
371 #define __FUNCT__ "PCBDDCResetTopography"
372 PetscErrorCode PCBDDCResetTopography(PC pc)
373 {
374   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
375   PetscErrorCode ierr;
376 
377   PetscFunctionBegin;
378   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
379   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
380   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
381   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
382   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
383   PetscFunctionReturn(0);
384 }
385 
386 #undef __FUNCT__
387 #define __FUNCT__ "PCBDDCResetSolvers"
388 PetscErrorCode PCBDDCResetSolvers(PC pc)
389 {
390   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
391   PetscScalar    *array;
392   PetscErrorCode ierr;
393 
394   PetscFunctionBegin;
395   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
396   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
397   ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
398   ierr = PetscFree(array);CHKERRQ(ierr);
399   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
400   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
401   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
402   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
403   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
404   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
405   if (pcbddc->local_auxmat2) {
406     ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
407     ierr = PetscFree(array);CHKERRQ(ierr);
408   }
409   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
410   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
411   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
412   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
413   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
414   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
415   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
416   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
417   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
418   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
419   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
420   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
421   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
422   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
423   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
424   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
425   PetscFunctionReturn(0);
426 }
427 
428 #undef __FUNCT__
429 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
430 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
431 {
432   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
433   PC_IS          *pcis = (PC_IS*)pc->data;
434   VecType        impVecType;
435   PetscInt       n_constraints,n_R,old_size;
436   PetscErrorCode ierr;
437 
438   PetscFunctionBegin;
439   if (!pcbddc->ConstraintMatrix) {
440     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
441   }
442   /* get sizes */
443   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
444   n_R = pcis->n-pcbddc->n_actual_vertices;
445   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
446   /* local work vectors (try to avoid unneeded work)*/
447   /* R nodes */
448   old_size = -1;
449   if (pcbddc->vec1_R) {
450     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
451   }
452   if (n_R != old_size) {
453     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
454     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
455     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
456     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
457     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
458     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
459   }
460   /* local primal dofs */
461   old_size = -1;
462   if (pcbddc->vec1_P) {
463     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
464   }
465   if (pcbddc->local_primal_size != old_size) {
466     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
467     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
468     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
469     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
470   }
471   /* local explicit constraints */
472   old_size = -1;
473   if (pcbddc->vec1_C) {
474     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
475   }
476   if (n_constraints && n_constraints != old_size) {
477     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
478     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
479     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
480     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
481   }
482   PetscFunctionReturn(0);
483 }
484 
485 #undef __FUNCT__
486 #define __FUNCT__ "PCBDDCSetUpCorrection"
487 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
488 {
489   PetscErrorCode         ierr;
490   /* pointers to pcis and pcbddc */
491   PC_IS*                 pcis = (PC_IS*)pc->data;
492   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
493   /* submatrices of local problem */
494   Mat                    A_RV,A_VR,A_VV;
495   /* submatrices of local coarse problem */
496   Mat                    S_VV,S_CV,S_VC,S_CC;
497   /* working matrices */
498   Mat                    C_CR;
499   /* additional working stuff */
500   PC                     pc_R;
501   Mat                    F;
502   PetscBool              isLU,isCHOL,isILU;
503 
504   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
505   PetscScalar            *work;
506   PetscInt               *idx_V_B;
507   PetscInt               n,n_vertices,n_constraints;
508   PetscInt               i,n_R,n_D,n_B;
509   PetscBool              unsymmetric_check;
510   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
511   MatType                impMatType;
512   /* some shortcuts to scalars */
513   PetscScalar            one=1.0,m_one=-1.0;
514 
515   PetscFunctionBegin;
516   /* get number of vertices (corners plus constraints with change of basis)
517      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
518   n_vertices = pcbddc->n_actual_vertices;
519   n_constraints = pcbddc->local_primal_size-n_vertices;
520   /* Set Non-overlapping dimensions */
521   n_B = pcis->n_B; n_D = pcis->n - n_B;
522   n_R = pcis->n-n_vertices;
523 
524   /* Set types for local objects needed by BDDC precondtioner */
525   impMatType = MATSEQDENSE;
526 
527   /* vertices in boundary numbering */
528   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
529   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
530   if (i != n_vertices) {
531     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
532   }
533 
534   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
535   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
536   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
537   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
538   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
539   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
540   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
541   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
542   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
543   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
544 
545   unsymmetric_check = PETSC_FALSE;
546   /* allocate workspace */
547   n = 0;
548   if (n_constraints) {
549     n += n_R*n_constraints;
550   }
551   if (n_vertices) {
552     n = PetscMax(2*n_R*n_vertices,n);
553   }
554   if (!pcbddc->issym) {
555     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
556     unsymmetric_check = PETSC_TRUE;
557   }
558   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
559 
560   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
561   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
562   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
563   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
564   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
565   if (isLU || isILU || isCHOL) {
566     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
567   } else {
568     F = NULL;
569   }
570 
571   /* Precompute stuffs needed for preprocessing and application of BDDC*/
572   if (n_constraints) {
573     Mat M1,M2,M3;
574     IS  is_aux;
575     /* see if we can save some allocations */
576     if (pcbddc->local_auxmat2) {
577       PetscInt on_R,on_constraints;
578       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
579       if (on_R != n_R || on_constraints != n_constraints) {
580         PetscScalar *marray;
581 
582         ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr);
583         ierr = PetscFree(marray);CHKERRQ(ierr);
584         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
585         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
586       }
587     }
588     /* auxiliary matrices */
589     if (!pcbddc->local_auxmat2) {
590       PetscScalar *marray;
591 
592       ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr);
593       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr);
594       marray += n_R*n_constraints;
595       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr);
596     }
597 
598     /* Extract constraints on R nodes: C_{CR}  */
599     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
600     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
601     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
602 
603     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
604     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
605     for (i=0;i<n_constraints;i++) {
606       const PetscScalar *row_cmat_values;
607       const PetscInt    *row_cmat_indices;
608       PetscInt          size_of_constraint,j;
609 
610       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
611       for (j=0;j<size_of_constraint;j++) {
612         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
613       }
614       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
615     }
616     if (F) {
617       Mat B;
618 
619       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
620       ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr);
621       ierr = MatDestroy(&B);CHKERRQ(ierr);
622     } else {
623       PetscScalar *xarray;
624       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
625       for (i=0;i<n_constraints;i++) {
626         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
627         ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr);
628         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
629         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
630         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
631       }
632       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
633     }
634 
635     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
636     ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
637     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
638     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
639     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
640     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
641     ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr);
642     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
643     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
644     ierr = MatDestroy(&M2);CHKERRQ(ierr);
645     ierr = MatDestroy(&M3);CHKERRQ(ierr);
646     /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */
647     ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
648     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
649     ierr = MatDestroy(&M1);CHKERRQ(ierr);
650   }
651   /* Get submatrices from subdomain matrix */
652   if (n_vertices) {
653     Mat       newmat;
654     IS        is_aux;
655     PetscInt  ibs,mbs;
656     PetscBool issbaij;
657 
658     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
659     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
660     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
661     if (ibs != mbs) { /* need to convert to SEQAIJ */
662       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
663       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
664       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
665       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
666       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
667     } else {
668       /* this is safe */
669       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
670       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
671       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
672         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
673         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
674         ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
675         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
676         ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
677       } else {
678         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
679         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
680       }
681     }
682     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
683   }
684 
685   /* Matrix of coarse basis functions (local) */
686   if (pcbddc->coarse_phi_B) {
687     PetscInt on_B,on_primal,on_D=n_D;
688     if (pcbddc->coarse_phi_D) {
689       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
690     }
691     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
692     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
693       PetscScalar *marray;
694 
695       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
696       ierr = PetscFree(marray);CHKERRQ(ierr);
697       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
698       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
699       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
700       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
701     }
702   }
703 
704   if (!pcbddc->coarse_phi_B) {
705     PetscScalar *marray;
706 
707     n = n_B*pcbddc->local_primal_size;
708     if (pcbddc->switch_static || pcbddc->dbg_flag) {
709       n += n_D*pcbddc->local_primal_size;
710     }
711     if (!pcbddc->issym) {
712       n *= 2;
713     }
714     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
715     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
716     n = n_B*pcbddc->local_primal_size;
717     if (pcbddc->switch_static || pcbddc->dbg_flag) {
718       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
719       n += n_D*pcbddc->local_primal_size;
720     }
721     if (!pcbddc->issym) {
722       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
723       if (pcbddc->switch_static || pcbddc->dbg_flag) {
724         n = n_B*pcbddc->local_primal_size;
725         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
726       }
727     } else {
728       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
729       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
730       if (pcbddc->switch_static || pcbddc->dbg_flag) {
731         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
732         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
733       }
734     }
735   }
736   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
737   /* vertices */
738   if (n_vertices) {
739 
740     if (n_R) {
741       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
742       PetscBLASInt B_N,B_one = 1;
743       PetscScalar  *x,*y;
744 
745       ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
746       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
747       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
748       if (F) {
749         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
750       } else {
751         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
752         for (i=0;i<n_vertices;i++) {
753           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
754           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
755           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
756           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
757           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
758         }
759         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
760       }
761       ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr);
762       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
763       if (n_constraints) {
764         Mat B;
765         ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
766         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
767         ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
768         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
769         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
770         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
771         ierr = MatDestroy(&B);CHKERRQ(ierr);
772       }
773       ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
774       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
775       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
776       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
777       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
778       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
779       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
780       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
781       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
782       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
783       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
784       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
785     } else {
786       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
787       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
788     }
789     /* coarse basis functions */
790     for (i=0;i<n_vertices;i++) {
791       PetscScalar *y;
792 
793       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
794       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
795       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
796       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
797       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
798       y[n_B*i+idx_V_B[i]] = 1.0;
799       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
800       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
801 
802       if (pcbddc->switch_static || pcbddc->dbg_flag) {
803         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
804         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
805         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
806         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
807         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
808         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
809       }
810       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
811     }
812     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
813     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
814   }
815 
816   if (n_constraints) {
817     Mat B;
818 
819     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
820     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
821     ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
822     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
823     if (n_vertices) {
824       ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr);
825     }
826     ierr = MatDestroy(&B);CHKERRQ(ierr);
827     /* coarse basis functions */
828     for (i=0;i<n_constraints;i++) {
829       PetscScalar *y;
830 
831       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
832       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
833       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
834       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
835       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
836       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
837       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
838 
839       if (pcbddc->switch_static || pcbddc->dbg_flag) {
840         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
841         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
842         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
843         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
844         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
845         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
846       }
847       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
848     }
849   }
850 
851   /* compute other basis functions for non-symmetric problems */
852   if (!pcbddc->issym) {
853     Mat B,X;
854 
855     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr);
856 
857     if (n_constraints) {
858       Mat S_CCT,B_C;
859 
860       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
861       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
862       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
863       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
864       if (n_vertices) {
865         Mat B_V,S_VCT;
866 
867         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
868         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
869         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
870         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
871         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
872       }
873       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
874     }
875     if (n_vertices && n_R) {
876       Mat          A_VRT;
877       PetscBLASInt B_N,B_one = 1;
878 
879       if (!n_constraints) { /* if there are no constraints, reset work */
880         ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
881       }
882       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr);
883       ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
884       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
885       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one));
886       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
887     }
888 
889     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr);
890     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
891       for (i=0;i<pcbddc->local_primal_size;i++) {
892         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
893         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
894         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
895         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
896         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
897       }
898     } else {
899       for (i=0;i<pcbddc->local_primal_size;i++) {
900         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
901         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
902         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
903         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
904         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
905       }
906     }
907     ierr = MatDestroy(&B);CHKERRQ(ierr);
908     /* coarse basis functions */
909     for (i=0;i<pcbddc->local_primal_size;i++) {
910       PetscScalar *y;
911 
912       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
913       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
914       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
915       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
916       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
917       if (i<n_vertices) {
918         y[n_B*i+idx_V_B[i]] = 1.0;
919       }
920       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
921       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
922 
923       if (pcbddc->switch_static || pcbddc->dbg_flag) {
924         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
925         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
926         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
927         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
928         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
929         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
930       }
931       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
932     }
933     ierr = MatDestroy(&X);CHKERRQ(ierr);
934   }
935 
936   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
937   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
938   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
939   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
940   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
941   /* Checking coarse_sub_mat and coarse basis functios */
942   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
943   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
944   if (pcbddc->dbg_flag) {
945     Mat         coarse_sub_mat;
946     Mat         AUXMAT,TM1,TM2,TM3,TM4;
947     Mat         coarse_phi_D,coarse_phi_B;
948     Mat         coarse_psi_D,coarse_psi_B;
949     Mat         A_II,A_BB,A_IB,A_BI;
950     MatType     checkmattype=MATSEQAIJ;
951     PetscReal   real_value;
952 
953     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
954     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
955     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
956     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
957     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
958     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
959     if (unsymmetric_check) {
960       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
961       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
962     }
963     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
964 
965     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
966     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr);
967     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
968     if (unsymmetric_check) {
969       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
970       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
971       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
972       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
973       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
974       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
975       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
976       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
977       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
978       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
979       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
980       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
981     } else {
982       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
983       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
984       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
985       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
986       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
987       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
988       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
989       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
990     }
991     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
992     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
993     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
994     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
995     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
996     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
997 
998     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
999     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1000     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1001     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1002     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1003     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1004     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1005     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1006     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1007     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1008     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1009     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1010     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1011     if (unsymmetric_check) {
1012       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1013       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1014     }
1015     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1016   }
1017 
1018   /* free memory */
1019   ierr = PetscFree(work);CHKERRQ(ierr);
1020   if (n_vertices) {
1021     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1022   }
1023   if (n_constraints) {
1024     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1025   }
1026   /* get back data */
1027   *coarse_submat_vals_n = coarse_submat_vals;
1028   PetscFunctionReturn(0);
1029 }
1030 
1031 #undef __FUNCT__
1032 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1033 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, MatStructure reuse, Mat* B)
1034 {
1035   Mat            *work_mat;
1036   IS             isrow_s,iscol_s;
1037   PetscBool      rsorted,csorted;
1038   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1039   PetscErrorCode ierr;
1040 
1041   PetscFunctionBegin;
1042   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1043   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1044   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1045   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1046 
1047   if (!rsorted) {
1048     const PetscInt *idxs;
1049     PetscInt *idxs_sorted,i;
1050 
1051     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1052     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1053     for (i=0;i<rsize;i++) {
1054       idxs_perm_r[i] = i;
1055     }
1056     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1057     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1058     for (i=0;i<rsize;i++) {
1059       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1060     }
1061     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1062     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1063   } else {
1064     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1065     isrow_s = isrow;
1066   }
1067 
1068   if (!csorted) {
1069     if (isrow == iscol) {
1070       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1071       iscol_s = isrow_s;
1072     } else {
1073       const PetscInt *idxs;
1074       PetscInt *idxs_sorted,i;
1075 
1076       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1077       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1078       for (i=0;i<csize;i++) {
1079         idxs_perm_c[i] = i;
1080       }
1081       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1082       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1083       for (i=0;i<csize;i++) {
1084         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1085       }
1086       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1087       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1088     }
1089   } else {
1090     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1091     iscol_s = iscol;
1092   }
1093 
1094   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,reuse,&work_mat);CHKERRQ(ierr);
1095 
1096   if (!rsorted || !csorted) {
1097     Mat      new_mat;
1098     IS       is_perm_r,is_perm_c;
1099 
1100     if (!rsorted) {
1101       PetscInt *idxs_r,i;
1102       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1103       for (i=0;i<rsize;i++) {
1104         idxs_r[idxs_perm_r[i]] = i;
1105       }
1106       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1107       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1108     } else {
1109       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1110     }
1111     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1112 
1113     if (!csorted) {
1114       if (isrow_s == iscol_s) {
1115         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1116         is_perm_c = is_perm_r;
1117       } else {
1118         PetscInt *idxs_c,i;
1119         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1120         for (i=0;i<csize;i++) {
1121           idxs_c[idxs_perm_c[i]] = i;
1122         }
1123         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1124         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1125       }
1126     } else {
1127       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1128     }
1129     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1130 
1131     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1132     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1133     work_mat[0] = new_mat;
1134     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1135     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1136   }
1137 
1138   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1139   *B = work_mat[0];
1140   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1141   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1142   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 #undef __FUNCT__
1147 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1148 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1149 {
1150   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1151   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1152   Mat            new_mat;
1153   IS             is_local,is_global;
1154   PetscInt       local_size;
1155   PetscBool      isseqaij;
1156   PetscErrorCode ierr;
1157 
1158   PetscFunctionBegin;
1159   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1160   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1161   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1162   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1163   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1164   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,MAT_INITIAL_MATRIX,&new_mat);CHKERRQ(ierr);
1165   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1166 
1167   /* check */
1168   if (pcbddc->dbg_flag) {
1169     Vec       x,x_change;
1170     PetscReal error;
1171 
1172     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1173     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1174     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1175     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1176     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1177     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1178     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1179     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1180     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1181     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1182     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1183     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1184     ierr = VecDestroy(&x);CHKERRQ(ierr);
1185     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1186   }
1187 
1188   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1189   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1190   if (isseqaij) {
1191     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1192   } else {
1193     Mat work_mat;
1194     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1195     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1196     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1197   }
1198   ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
1199   /*
1200   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1201   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1202   */
1203   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 #undef __FUNCT__
1208 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1209 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1210 {
1211   PC_IS*         pcis = (PC_IS*)(pc->data);
1212   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1213   IS             is_aux1,is_aux2;
1214   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1215   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
1216   PetscInt       vbs,bs;
1217   PetscBT        bitmask;
1218   PetscErrorCode ierr;
1219 
1220   PetscFunctionBegin;
1221   /*
1222     No need to setup local scatters if
1223       - primal space is unchanged
1224         AND
1225       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1226         AND
1227       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1228   */
1229   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1230     PetscFunctionReturn(0);
1231   }
1232   /* destroy old objects */
1233   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1234   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1235   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1236   /* Set Non-overlapping dimensions */
1237   n_B = pcis->n_B; n_D = pcis->n - n_B;
1238   n_vertices = pcbddc->n_actual_vertices;
1239   /* create auxiliary bitmask */
1240   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1241   for (i=0;i<n_vertices;i++) {
1242     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
1243   }
1244 
1245   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1246   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1247   for (i=0, n_R=0; i<pcis->n; i++) {
1248     if (!PetscBTLookup(bitmask,i)) {
1249       idx_R_local[n_R] = i;
1250       n_R++;
1251     }
1252   }
1253 
1254   /* Block code */
1255   vbs = 1;
1256   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1257   if (bs>1 && !(n_vertices%bs)) {
1258     PetscBool is_blocked = PETSC_TRUE;
1259     PetscInt  *vary;
1260     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1261     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1262     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1263     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
1264     for (i=0; i<n_vertices; i++) {
1265       if (vary[i]!=0 && vary[i]!=bs) {
1266         is_blocked = PETSC_FALSE;
1267         break;
1268       }
1269     }
1270     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1271       vbs = bs;
1272       for (i=0;i<n_R/vbs;i++) {
1273         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1274       }
1275     }
1276     ierr = PetscFree(vary);CHKERRQ(ierr);
1277   }
1278   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1279   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1280 
1281   /* print some info if requested */
1282   if (pcbddc->dbg_flag) {
1283     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1284     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1285     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1286     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1287     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1288     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1289     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
1290     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1291   }
1292 
1293   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1294   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1295   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1296   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1297   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1298   for (i=0; i<n_D; i++) {
1299     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1300   }
1301   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1302   for (i=0, j=0; i<n_R; i++) {
1303     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1304       aux_array1[j++] = i;
1305     }
1306   }
1307   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1308   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1309   for (i=0, j=0; i<n_B; i++) {
1310     if (!PetscBTLookup(bitmask,is_indices[i])) {
1311       aux_array2[j++] = i;
1312     }
1313   }
1314   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1315   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1316   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1317   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1318   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1319 
1320   if (pcbddc->switch_static || pcbddc->dbg_flag) {
1321     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1322     for (i=0, j=0; i<n_R; i++) {
1323       if (PetscBTLookup(bitmask,idx_R_local[i])) {
1324         aux_array1[j++] = i;
1325       }
1326     }
1327     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1328     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1329     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1330   }
1331   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1332   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1333   PetscFunctionReturn(0);
1334 }
1335 
1336 
1337 #undef __FUNCT__
1338 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1339 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1340 {
1341   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1342   PC_IS          *pcis = (PC_IS*)pc->data;
1343   PC             pc_temp;
1344   Mat            A_RR;
1345   MatReuse       reuse;
1346   PetscScalar    m_one = -1.0;
1347   PetscReal      value;
1348   PetscInt       n_D,n_R,ibs,mbs;
1349   PetscBool      use_exact,use_exact_reduced,issbaij;
1350   PetscErrorCode ierr;
1351   /* prefixes stuff */
1352   char           dir_prefix[256],neu_prefix[256],str_level[16];
1353   size_t         len;
1354 
1355   PetscFunctionBegin;
1356 
1357   /* compute prefixes */
1358   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1359   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1360   if (!pcbddc->current_level) {
1361     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1362     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1363     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1364     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1365   } else {
1366     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1367     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1368     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1369     len -= 15; /* remove "pc_bddc_coarse_" */
1370     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1371     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1372     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1373     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1374     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1375     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1376     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1377     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1378   }
1379 
1380   /* DIRICHLET PROBLEM */
1381   if (dirichlet) {
1382     /* Matrix for Dirichlet problem is pcis->A_II */
1383     ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1384     if (!pcbddc->ksp_D) { /* create object if not yet build */
1385       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1386       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1387       /* default */
1388       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1389       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1390       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1391       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1392       if (issbaij) {
1393         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1394       } else {
1395         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1396       }
1397       /* Allow user's customization */
1398       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1399       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1400     }
1401     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1402     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1403     if (!n_D) {
1404       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1405       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1406     }
1407     /* Set Up KSP for Dirichlet problem of BDDC */
1408     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1409     /* set ksp_D into pcis data */
1410     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1411     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1412     pcis->ksp_D = pcbddc->ksp_D;
1413   }
1414 
1415   /* NEUMANN PROBLEM */
1416   A_RR = 0;
1417   if (neumann) {
1418     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1419     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1420     if (pcbddc->ksp_R) { /* already created ksp */
1421       PetscInt nn_R;
1422       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1423       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1424       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1425       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1426         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1427         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1428         reuse = MAT_INITIAL_MATRIX;
1429       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1430         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1431           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1432           reuse = MAT_INITIAL_MATRIX;
1433         } else { /* safe to reuse the matrix */
1434           reuse = MAT_REUSE_MATRIX;
1435         }
1436       }
1437       /* last check */
1438       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1439         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1440         reuse = MAT_INITIAL_MATRIX;
1441       }
1442     } else { /* first time, so we need to create the matrix */
1443       reuse = MAT_INITIAL_MATRIX;
1444     }
1445     /* extract A_RR */
1446     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1447     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1448     if (ibs != mbs) {
1449       Mat newmat;
1450       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1451       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1452       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1453     } else {
1454       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1455     }
1456     if (!pcbddc->ksp_R) { /* create object if not present */
1457       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1458       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1459       /* default */
1460       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1461       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1462       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1463       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1464       if (issbaij) {
1465         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1466       } else {
1467         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1468       }
1469       /* Allow user's customization */
1470       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1471       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1472     }
1473     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1474     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1475     if (!n_R) {
1476       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1477       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1478     }
1479     /* Set Up KSP for Neumann problem of BDDC */
1480     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1481   }
1482 
1483   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1484   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1485     if (pcbddc->dbg_flag) {
1486       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1487       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1488       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1489     }
1490     if (dirichlet) { /* Dirichlet */
1491       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1492       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1493       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1494       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1495       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1496       /* need to be adapted? */
1497       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1498       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1499       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1500       /* print info */
1501       if (pcbddc->dbg_flag) {
1502         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1503         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1504       }
1505       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1506         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1507       }
1508     }
1509     if (neumann) { /* Neumann */
1510       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1511       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1512       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1513       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1514       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1515       /* need to be adapted? */
1516       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1517       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1518       /* print info */
1519       if (pcbddc->dbg_flag) {
1520         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1521         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1522       }
1523       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1524         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1525       }
1526     }
1527   }
1528   /* free Neumann problem's matrix */
1529   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1530   PetscFunctionReturn(0);
1531 }
1532 
1533 #undef __FUNCT__
1534 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1535 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1536 {
1537   PetscErrorCode ierr;
1538   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1539 
1540   PetscFunctionBegin;
1541   if (applytranspose) {
1542     if (pcbddc->local_auxmat1) {
1543       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1544       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1545     }
1546     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1547   } else {
1548     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1549     if (pcbddc->local_auxmat1) {
1550       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1551       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1552     }
1553   }
1554   PetscFunctionReturn(0);
1555 }
1556 
1557 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1558 #undef __FUNCT__
1559 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1560 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1561 {
1562   PetscErrorCode ierr;
1563   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1564   PC_IS*            pcis = (PC_IS*)  (pc->data);
1565   const PetscScalar zero = 0.0;
1566 
1567   PetscFunctionBegin;
1568   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1569   if (applytranspose) {
1570     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1571     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1572   } else {
1573     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1574     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1575   }
1576   /* start communications from local primal nodes to rhs of coarse solver */
1577   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1578   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1579   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1580 
1581   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1582   /* TODO remove null space when doing multilevel */
1583   if (pcbddc->coarse_ksp) {
1584     if (applytranspose) {
1585       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1586     } else {
1587       ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1588     }
1589   }
1590 
1591   /* Local solution on R nodes */
1592   if (pcis->n) {
1593     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1594     ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1595     ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1596     if (pcbddc->switch_static) {
1597       ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1598       ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1599     }
1600     ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1601     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1602     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1603     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1604     if (pcbddc->switch_static) {
1605       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1606       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1607     }
1608   }
1609 
1610   /* communications from coarse sol to local primal nodes */
1611   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1612   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1613 
1614   /* Sum contributions from two levels */
1615   if (applytranspose) {
1616     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1617     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1618   } else {
1619     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1620     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1621   }
1622   PetscFunctionReturn(0);
1623 }
1624 
1625 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */
1626 #undef __FUNCT__
1627 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1628 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1629 {
1630   PetscErrorCode ierr;
1631   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1632   PetscScalar    *array,*array2;
1633   Vec            from,to;
1634 
1635   PetscFunctionBegin;
1636   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1637     from = pcbddc->coarse_vec;
1638     to = pcbddc->vec1_P;
1639     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1640       Vec tvec;
1641       PetscInt lsize;
1642       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1643       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1644       ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1645       ierr = VecGetArray(from,&array2);CHKERRQ(ierr);
1646       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1647       ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1648       ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr);
1649     }
1650   } else { /* from local to global -> put data in coarse right hand side */
1651     from = pcbddc->vec1_P;
1652     to = pcbddc->coarse_vec;
1653   }
1654   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1655   PetscFunctionReturn(0);
1656 }
1657 
1658 #undef __FUNCT__
1659 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1660 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1661 {
1662   PetscErrorCode ierr;
1663   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1664   PetscScalar    *array,*array2;
1665   Vec            from,to;
1666 
1667   PetscFunctionBegin;
1668   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1669     from = pcbddc->coarse_vec;
1670     to = pcbddc->vec1_P;
1671   } else { /* from local to global -> put data in coarse right hand side */
1672     from = pcbddc->vec1_P;
1673     to = pcbddc->coarse_vec;
1674   }
1675   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1676   if (smode == SCATTER_FORWARD) {
1677     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1678       Vec tvec;
1679       PetscInt lsize;
1680       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1681       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1682       ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1683       ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr);
1684       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1685       ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1686       ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr);
1687     }
1688   }
1689   PetscFunctionReturn(0);
1690 }
1691 
1692 /* uncomment for testing purposes */
1693 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1694 #undef __FUNCT__
1695 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1696 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1697 {
1698   PetscErrorCode    ierr;
1699   PC_IS*            pcis = (PC_IS*)(pc->data);
1700   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1701   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1702   /* one and zero */
1703   PetscScalar       one=1.0,zero=0.0;
1704   /* space to store constraints and their local indices */
1705   PetscScalar       *temp_quadrature_constraint;
1706   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1707   /* iterators */
1708   PetscInt          i,j,k,total_counts,temp_start_ptr;
1709   /* stuff to store connected components stored in pcbddc->mat_graph */
1710   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1711   PetscInt          n_ISForFaces,n_ISForEdges;
1712   /* near null space stuff */
1713   MatNullSpace      nearnullsp;
1714   const Vec         *nearnullvecs;
1715   Vec               *localnearnullsp;
1716   PetscBool         nnsp_has_cnst;
1717   PetscInt          nnsp_size;
1718   PetscScalar       *array;
1719   /* BLAS integers */
1720   PetscBLASInt      lwork,lierr;
1721   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1722   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1723   /* LAPACK working arrays for SVD or POD */
1724   PetscBool         skip_lapack;
1725   PetscScalar       *work;
1726   PetscReal         *singular_vals;
1727 #if defined(PETSC_USE_COMPLEX)
1728   PetscReal         *rwork;
1729 #endif
1730 #if defined(PETSC_MISSING_LAPACK_GESVD)
1731   PetscBLASInt      Blas_one_2=1;
1732   PetscScalar       *temp_basis,*correlation_mat;
1733 #else
1734   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1735   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1736 #endif
1737   /* reuse */
1738   PetscInt          olocal_primal_size;
1739   PetscInt          *oprimal_indices_local_idxs;
1740   /* change of basis */
1741   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1742   PetscBool         boolforchange,qr_needed;
1743   PetscBT           touched,change_basis,qr_needed_idx;
1744   /* auxiliary stuff */
1745   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1746   PetscInt          ncc,*gidxs=NULL,*permutation=NULL,*temp_indices_to_constraint_work=NULL;
1747   PetscScalar       *temp_quadrature_constraint_work=NULL;
1748   /* some quantities */
1749   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1750   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1751 
1752 
1753   PetscFunctionBegin;
1754   /* Destroy Mat objects computed previously */
1755   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1756   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1757   /* TODO synch with adaptive selection */
1758   /* Get index sets for faces, edges and vertices from graph */
1759   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1760   /* free unneeded index sets */
1761   if (!pcbddc->use_vertices) {
1762     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1763   }
1764   if (!pcbddc->use_edges) {
1765     for (i=0;i<n_ISForEdges;i++) {
1766       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1767     }
1768     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1769     n_ISForEdges = 0;
1770   }
1771   if (!pcbddc->use_faces) {
1772     for (i=0;i<n_ISForFaces;i++) {
1773       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1774     }
1775     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1776     n_ISForFaces = 0;
1777   }
1778   /* HACKS (the following two blocks of code) */
1779   if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1780     pcbddc->use_change_of_basis = PETSC_TRUE;
1781     if (!ISForEdges) {
1782       pcbddc->use_change_on_faces = PETSC_TRUE;
1783     }
1784   }
1785   if (pcbddc->NullSpace) {
1786     /* use_change_of_basis should be consistent among processors */
1787     PetscBool tbool[2],gbool[2];
1788     tbool [0] = pcbddc->use_change_of_basis;
1789     tbool [1] = pcbddc->use_change_on_faces;
1790     ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1791     pcbddc->use_change_of_basis = gbool[0];
1792     pcbddc->use_change_on_faces = gbool[1];
1793   }
1794   /* print some info */
1795   if (pcbddc->dbg_flag) {
1796     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1797     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1798     i = 0;
1799     if (ISForVertices) {
1800       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1801     }
1802     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1803     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1804     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1805     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1806   }
1807 
1808   if (!pcbddc->adaptive_selection) {
1809     /* check if near null space is attached to global mat */
1810     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1811     if (nearnullsp) {
1812       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1813       /* remove any stored info */
1814       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1815       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1816       /* store information for BDDC solver reuse */
1817       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1818       pcbddc->onearnullspace = nearnullsp;
1819       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1820       for (i=0;i<nnsp_size;i++) {
1821         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1822       }
1823     } else { /* if near null space is not provided BDDC uses constants by default */
1824       nnsp_size = 0;
1825       nnsp_has_cnst = PETSC_TRUE;
1826     }
1827     /* get max number of constraints on a single cc */
1828     max_constraints = nnsp_size;
1829     if (nnsp_has_cnst) max_constraints++;
1830 
1831     /*
1832          Evaluate maximum storage size needed by the procedure
1833          - temp_indices will contain start index of each constraint stored as follows
1834          - temp_indices_to_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1835          - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1836          - temp_quadrature_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself
1837                                                                                                                                                            */
1838     total_counts = n_ISForFaces+n_ISForEdges;
1839     total_counts *= max_constraints;
1840     n_vertices = 0;
1841     if (ISForVertices) {
1842       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1843     }
1844     total_counts += n_vertices;
1845     ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr);
1846     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1847     total_counts = 0;
1848     max_size_of_constraint = 0;
1849     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1850       if (i<n_ISForEdges) {
1851         used_IS = &ISForEdges[i];
1852       } else {
1853         used_IS = &ISForFaces[i-n_ISForEdges];
1854       }
1855       ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1856       total_counts += j;
1857       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1858     }
1859     total_counts *= max_constraints;
1860     total_counts += n_vertices;
1861     ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1862     /* get local part of global near null space vectors */
1863     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1864     for (k=0;k<nnsp_size;k++) {
1865       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1866       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1867       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1868     }
1869 
1870     /* whether or not to skip lapack calls */
1871     skip_lapack = PETSC_TRUE;
1872     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
1873 
1874     /* allocate some auxiliary stuff */
1875     if (!skip_lapack || pcbddc->use_qr_single) {
1876       ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr);
1877     }
1878 
1879     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1880     if (!skip_lapack) {
1881       PetscScalar temp_work;
1882 
1883 #if defined(PETSC_MISSING_LAPACK_GESVD)
1884       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1885       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1886       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1887       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1888 #if defined(PETSC_USE_COMPLEX)
1889       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1890 #endif
1891       /* now we evaluate the optimal workspace using query with lwork=-1 */
1892       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1893       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1894       lwork = -1;
1895       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1896 #if !defined(PETSC_USE_COMPLEX)
1897       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1898 #else
1899       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1900 #endif
1901       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1902       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1903 #else /* on missing GESVD */
1904       /* SVD */
1905       PetscInt max_n,min_n;
1906       max_n = max_size_of_constraint;
1907       min_n = max_constraints;
1908       if (max_size_of_constraint < max_constraints) {
1909         min_n = max_size_of_constraint;
1910         max_n = max_constraints;
1911       }
1912       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1913 #if defined(PETSC_USE_COMPLEX)
1914       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1915 #endif
1916       /* now we evaluate the optimal workspace using query with lwork=-1 */
1917       lwork = -1;
1918       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1919       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1920       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1921       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1922 #if !defined(PETSC_USE_COMPLEX)
1923       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1924 #else
1925       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1926 #endif
1927       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1928       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1929 #endif /* on missing GESVD */
1930       /* Allocate optimal workspace */
1931       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1932       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
1933     }
1934     /* Now we can loop on constraining sets */
1935     total_counts = 0;
1936     temp_indices[0] = 0;
1937     /* vertices */
1938     if (ISForVertices) {
1939       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1940       if (nnsp_has_cnst) { /* consider all vertices */
1941         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1942         for (i=0;i<n_vertices;i++) {
1943           temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1944           temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1945           total_counts++;
1946         }
1947       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1948         PetscBool used_vertex;
1949         for (i=0;i<n_vertices;i++) {
1950           used_vertex = PETSC_FALSE;
1951           k = 0;
1952           while (!used_vertex && k<nnsp_size) {
1953             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1954             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1955               temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1956               temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1957               temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1958               total_counts++;
1959               used_vertex = PETSC_TRUE;
1960             }
1961             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1962             k++;
1963           }
1964         }
1965       }
1966       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1967       n_vertices = total_counts;
1968     }
1969 
1970     /* edges and faces */
1971     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
1972       if (ncc<n_ISForEdges) {
1973         used_IS = &ISForEdges[ncc];
1974         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1975       } else {
1976         used_IS = &ISForFaces[ncc-n_ISForEdges];
1977         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1978       }
1979       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1980       temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1981       ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1982       ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1983       /* change of basis should not be performed on local periodic nodes */
1984       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1985       if (nnsp_has_cnst) {
1986         PetscScalar quad_value;
1987         temp_constraints++;
1988         if (!pcbddc->use_nnsp_true) {
1989           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1990         } else {
1991           quad_value = 1.0;
1992         }
1993         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1994         for (j=0;j<size_of_constraint;j++) {
1995           temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1996         }
1997         /* sort by global ordering if using lapack subroutines (not needed!) */
1998         if (!skip_lapack || pcbddc->use_qr_single) {
1999           ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
2000           for (j=0;j<size_of_constraint;j++) {
2001             permutation[j]=j;
2002           }
2003           ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2004           for (j=0;j<size_of_constraint;j++) {
2005             if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen");
2006           }
2007           for (j=0;j<size_of_constraint;j++) {
2008             temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2009             temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2010           }
2011           ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2012           ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2013         }
2014         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2015         total_counts++;
2016       }
2017       for (k=0;k<nnsp_size;k++) {
2018         PetscReal real_value;
2019         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2020         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2021         for (j=0;j<size_of_constraint;j++) {
2022           temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
2023         }
2024         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2025         /* check if array is null on the connected component */
2026         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2027         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
2028         if (real_value > 0.0) { /* keep indices and values */
2029           /* sort by global ordering if using lapack subroutines */
2030           if (!skip_lapack || pcbddc->use_qr_single) {
2031             ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
2032             for (j=0;j<size_of_constraint;j++) {
2033               permutation[j]=j;
2034             }
2035             ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2036             for (j=0;j<size_of_constraint;j++) {
2037               temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2038               temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2039             }
2040             ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2041             ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2042           }
2043           temp_constraints++;
2044           temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2045           total_counts++;
2046         }
2047       }
2048       ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2049       valid_constraints = temp_constraints;
2050       if (!pcbddc->use_nnsp_true && temp_constraints) {
2051         if (temp_constraints == 1) { /* just normalize the constraint */
2052           PetscScalar norm;
2053           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2054           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2055           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2056           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2057         } else { /* perform SVD */
2058           PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2059 
2060 #if defined(PETSC_MISSING_LAPACK_GESVD)
2061           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2062              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2063              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2064                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2065                 from that computed using LAPACKgesvd
2066              -> This is due to a different computation of eigenvectors in LAPACKheev
2067              -> The quality of the POD-computed basis will be the same */
2068           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2069           /* Store upper triangular part of correlation matrix */
2070           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2071           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2072           for (j=0;j<temp_constraints;j++) {
2073             for (k=0;k<j+1;k++) {
2074               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
2075             }
2076           }
2077           /* compute eigenvalues and eigenvectors of correlation matrix */
2078           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2079           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2080 #if !defined(PETSC_USE_COMPLEX)
2081           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2082 #else
2083           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2084 #endif
2085           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2086           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2087           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2088           j = 0;
2089           while (j < temp_constraints && singular_vals[j] < tol) j++;
2090           total_counts = total_counts-j;
2091           valid_constraints = temp_constraints-j;
2092           /* scale and copy POD basis into used quadrature memory */
2093           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2094           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2095           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2096           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2097           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2098           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2099           if (j<temp_constraints) {
2100             PetscInt ii;
2101             for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
2102             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2103             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2104             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2105             for (k=0;k<temp_constraints-j;k++) {
2106               for (ii=0;ii<size_of_constraint;ii++) {
2107                 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2108               }
2109             }
2110           }
2111 #else  /* on missing GESVD */
2112           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2113           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2114           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2115           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2116 #if !defined(PETSC_USE_COMPLEX)
2117           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
2118 #else
2119           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
2120 #endif
2121           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2122           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2123           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2124           k = temp_constraints;
2125           if (k > size_of_constraint) k = size_of_constraint;
2126           j = 0;
2127           while (j < k && singular_vals[k-j-1] < tol) j++;
2128           valid_constraints = k-j;
2129           total_counts = total_counts-temp_constraints+valid_constraints;
2130 #endif /* on missing GESVD */
2131         }
2132       }
2133       /* setting change_of_basis flag is safe now */
2134       if (boolforchange) {
2135         for (j=0;j<valid_constraints;j++) {
2136           PetscBTSet(change_basis,total_counts-j-1);
2137         }
2138       }
2139     }
2140     /* free workspace */
2141     if (!skip_lapack || pcbddc->use_qr_single) {
2142       ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr);
2143     }
2144     if (!skip_lapack) {
2145       ierr = PetscFree(work);CHKERRQ(ierr);
2146 #if defined(PETSC_USE_COMPLEX)
2147       ierr = PetscFree(rwork);CHKERRQ(ierr);
2148 #endif
2149       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2150 #if defined(PETSC_MISSING_LAPACK_GESVD)
2151       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2152       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2153 #endif
2154     }
2155     for (k=0;k<nnsp_size;k++) {
2156       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2157     }
2158     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2159   } else {
2160     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2161     PetscInt        cum = 0;
2162 
2163     total_counts = 0;
2164     n_vertices = 0;
2165     if (sub_schurs->is_Ej_com) {
2166       ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr);
2167     }
2168     max_constraints = 0;
2169     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2170       total_counts += pcbddc->adaptive_constraints_n[i];
2171       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2172     }
2173     temp_indices = pcbddc->adaptive_constraints_ptrs;
2174     temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs;
2175     temp_quadrature_constraint = pcbddc->adaptive_constraints_data;
2176 
2177 #if 0
2178     printf("Found %d totals\n",total_counts);
2179     for (i=0;i<total_counts;i++) {
2180       printf("const %d, start %d",i,temp_indices[i]);
2181       printf(" end %d:\n",temp_indices[i+1]);
2182       for (j=temp_indices[i];j<temp_indices[i+1];j++) {
2183         printf("  idxs %d",temp_indices_to_constraint[j]);
2184         printf("  data %1.2e\n",temp_quadrature_constraint[j]);
2185       }
2186     }
2187     for (i=0;i<n_vertices;i++) {
2188       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i+n_vertices]);
2189     }
2190     for (i=0;i<sub_schurs->n_subs;i++) {
2191       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2192     }
2193 #endif
2194 
2195     for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]);
2196     ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr);
2197     /* Change of basis */
2198     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2199     if (pcbddc->use_change_of_basis) {
2200       cum = n_vertices;
2201       for (i=0;i<sub_schurs->n_subs;i++) {
2202         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2203           for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) {
2204             ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr);
2205           }
2206         }
2207         cum += pcbddc->adaptive_constraints_n[i+n_vertices];
2208       }
2209     }
2210   }
2211 
2212   /* free index sets of faces, edges and vertices */
2213   for (i=0;i<n_ISForFaces;i++) {
2214     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2215   }
2216   if (n_ISForFaces) {
2217     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2218   }
2219   for (i=0;i<n_ISForEdges;i++) {
2220     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2221   }
2222   if (n_ISForEdges) {
2223     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2224   }
2225   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2226 
2227   /* map temp_indices_to_constraint in boundary numbering */
2228   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
2229   if (i != temp_indices[total_counts]) {
2230     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
2231   }
2232 
2233   /* set quantities in pcbddc data structure and store previous primal size */
2234   /* n_vertices defines the number of subdomain corners in the primal space */
2235   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
2236   olocal_primal_size = pcbddc->local_primal_size;
2237   pcbddc->local_primal_size = total_counts;
2238   pcbddc->n_vertices = n_vertices;
2239   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
2240 
2241   /* Create constraint matrix */
2242   /* The constraint matrix is used to compute the l2g map of primal dofs */
2243   /* so we need to set it up properly either with or without change of basis */
2244   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2245   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2246   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2247   /* array to compute a local numbering of constraints : vertices first then constraints */
2248   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
2249   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
2250   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
2251   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
2252   /* auxiliary stuff for basis change */
2253   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
2254   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
2255 
2256   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2257   total_primal_vertices=0;
2258   for (i=0;i<pcbddc->local_primal_size;i++) {
2259     size_of_constraint=temp_indices[i+1]-temp_indices[i];
2260     if (size_of_constraint == 1) {
2261       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
2262       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
2263       aux_primal_minloc[total_primal_vertices]=0;
2264       total_primal_vertices++;
2265     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
2266       PetscInt min_loc,min_index;
2267       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
2268       /* find first untouched local node */
2269       k = 0;
2270       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
2271       min_index = global_indices[k];
2272       min_loc = k;
2273       /* search the minimum among global nodes already untouched on the cc */
2274       for (k=1;k<size_of_constraint;k++) {
2275         /* there can be more than one constraint on a single connected component */
2276         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
2277           min_index = global_indices[k];
2278           min_loc = k;
2279         }
2280       }
2281       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
2282       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
2283       aux_primal_minloc[total_primal_vertices]=min_loc;
2284       total_primal_vertices++;
2285     }
2286   }
2287   /* determine if a QR strategy is needed for change of basis */
2288   qr_needed = PETSC_FALSE;
2289   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
2290   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2291     if (PetscBTLookup(change_basis,i)) {
2292       if (!pcbddc->use_qr_single) {
2293         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2294         j = 0;
2295         for (k=0;k<size_of_constraint;k++) {
2296           if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
2297             j++;
2298           }
2299         }
2300         /* found more than one primal dof on the cc */
2301         if (j > 1) {
2302           PetscBTSet(qr_needed_idx,i);
2303           qr_needed = PETSC_TRUE;
2304         }
2305       } else {
2306         PetscBTSet(qr_needed_idx,i);
2307         qr_needed = PETSC_TRUE;
2308       }
2309     }
2310   }
2311   /* free workspace */
2312   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2313 
2314   /* permute indices in order to have a sorted set of vertices */
2315   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
2316 
2317   /* nonzero structure of constraint matrix */
2318   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2319   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
2320   j=total_primal_vertices;
2321   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2322     if (!PetscBTLookup(change_basis,i)) {
2323       nnz[j]=temp_indices[i+1]-temp_indices[i];
2324       j++;
2325     }
2326   }
2327   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2328   ierr = PetscFree(nnz);CHKERRQ(ierr);
2329   /* set values in constraint matrix */
2330   for (i=0;i<total_primal_vertices;i++) {
2331     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2332   }
2333   total_counts = total_primal_vertices;
2334   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2335     if (!PetscBTLookup(change_basis,i)) {
2336       size_of_constraint=temp_indices[i+1]-temp_indices[i];
2337       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
2338       total_counts++;
2339     }
2340   }
2341   /* assembling */
2342   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2343   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2344   /*
2345   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2346   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2347   */
2348   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2349   if (pcbddc->use_change_of_basis) {
2350     /* dual and primal dofs on a single cc */
2351     PetscInt     dual_dofs,primal_dofs;
2352     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
2353     PetscInt     primal_counter;
2354     /* working stuff for GEQRF */
2355     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2356     PetscBLASInt lqr_work;
2357     /* working stuff for UNGQR */
2358     PetscScalar  *gqr_work,lgqr_work_t;
2359     PetscBLASInt lgqr_work;
2360     /* working stuff for TRTRS */
2361     PetscScalar  *trs_rhs;
2362     PetscBLASInt Blas_NRHS;
2363     /* pointers for values insertion into change of basis matrix */
2364     PetscInt     *start_rows,*start_cols;
2365     PetscScalar  *start_vals;
2366     /* working stuff for values insertion */
2367     PetscBT      is_primal;
2368     /* matrix sizes */
2369     PetscInt     global_size,local_size;
2370     /* temporary change of basis */
2371     Mat          localChangeOfBasisMatrix;
2372 
2373     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2374     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2375     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2376     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2377     /* nonzeros for local mat */
2378     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2379     for (i=0;i<pcis->n;i++) nnz[i]=1;
2380     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2381       if (PetscBTLookup(change_basis,i)) {
2382         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2383         if (PetscBTLookup(qr_needed_idx,i)) {
2384           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2385         } else {
2386           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2;
2387           /* get local primal index on the cc */
2388           j = 0;
2389           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
2390           nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2391         }
2392       }
2393     }
2394     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2395     ierr = PetscFree(nnz);CHKERRQ(ierr);
2396     /* Set initial identity in the matrix */
2397     for (i=0;i<pcis->n;i++) {
2398       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2399     }
2400 
2401     if (pcbddc->dbg_flag) {
2402       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2403       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2404     }
2405 
2406 
2407     /* Now we loop on the constraints which need a change of basis */
2408     /*
2409        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2410        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2411 
2412        Basic blocks of change of basis matrix T computed by
2413 
2414           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2415 
2416             | 1        0   ...        0         s_1/S |
2417             | 0        1   ...        0         s_2/S |
2418             |              ...                        |
2419             | 0        ...            1     s_{n-1}/S |
2420             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2421 
2422             with S = \sum_{i=1}^n s_i^2
2423             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2424                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2425 
2426           - QR decomposition of constraints otherwise
2427     */
2428     if (qr_needed) {
2429       /* space to store Q */
2430       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2431       /* first we issue queries for optimal work */
2432       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2433       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2434       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2435       lqr_work = -1;
2436       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2437       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2438       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2439       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2440       lgqr_work = -1;
2441       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2442       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2443       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2444       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2445       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2446       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2447       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2448       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2449       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2450       /* array to store scaling factors for reflectors */
2451       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2452       /* array to store rhs and solution of triangular solver */
2453       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2454       /* allocating workspace for check */
2455       if (pcbddc->dbg_flag) {
2456         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr);
2457       }
2458     }
2459     /* array to store whether a node is primal or not */
2460     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2461     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2462     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2463     if (i != total_primal_vertices) {
2464       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2465     }
2466     for (i=0;i<total_primal_vertices;i++) {
2467       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2468     }
2469     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2470 
2471     /* loop on constraints and see whether or not they need a change of basis and compute it */
2472     /* -> using implicit ordering contained in temp_indices data */
2473     total_counts = pcbddc->n_vertices;
2474     primal_counter = total_counts;
2475     while (total_counts<pcbddc->local_primal_size) {
2476       primal_dofs = 1;
2477       if (PetscBTLookup(change_basis,total_counts)) {
2478         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2479         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) {
2480           primal_dofs++;
2481         }
2482         /* get constraint info */
2483         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2484         dual_dofs = size_of_constraint-primal_dofs;
2485 
2486         if (pcbddc->dbg_flag) {
2487           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2488         }
2489 
2490         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2491 
2492           /* copy quadrature constraints for change of basis check */
2493           if (pcbddc->dbg_flag) {
2494             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2495           }
2496           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2497           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2498 
2499           /* compute QR decomposition of constraints */
2500           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2501           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2502           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2503           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2504           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2505           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2506           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2507 
2508           /* explictly compute R^-T */
2509           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2510           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2511           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2512           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2513           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2514           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2515           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2516           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2517           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2518           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2519 
2520           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2521           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2522           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2523           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2524           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2525           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2526           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2527           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2528           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2529 
2530           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2531              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2532              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2533           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2534           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2535           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2536           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2537           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2538           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2539           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2540           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2541           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2542           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2543 
2544           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2545           start_rows = &temp_indices_to_constraint[temp_indices[total_counts]];
2546           /* insert cols for primal dofs */
2547           for (j=0;j<primal_dofs;j++) {
2548             start_vals = &qr_basis[j*size_of_constraint];
2549             start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2550             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2551           }
2552           /* insert cols for dual dofs */
2553           for (j=0,k=0;j<dual_dofs;k++) {
2554             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2555               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2556               start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k];
2557               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2558               j++;
2559             }
2560           }
2561 
2562           /* check change of basis */
2563           if (pcbddc->dbg_flag) {
2564             PetscInt   ii,jj;
2565             PetscBool valid_qr=PETSC_TRUE;
2566             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2567             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2568             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2569             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2570             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2571             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2572             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2573             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2574             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2575             for (jj=0;jj<size_of_constraint;jj++) {
2576               for (ii=0;ii<primal_dofs;ii++) {
2577                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2578                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2579               }
2580             }
2581             if (!valid_qr) {
2582               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2583               for (jj=0;jj<size_of_constraint;jj++) {
2584                 for (ii=0;ii<primal_dofs;ii++) {
2585                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2586                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2587                   }
2588                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2589                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2590                   }
2591                 }
2592               }
2593             } else {
2594               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2595             }
2596           }
2597         } else { /* simple transformation block */
2598           PetscInt    row,col;
2599           PetscScalar val,norm;
2600 
2601           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2602           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one));
2603           for (j=0;j<size_of_constraint;j++) {
2604             PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2605             row = temp_indices_to_constraint[temp_indices[total_counts]+j];
2606             if (!PetscBTLookup(is_primal,row_B)) {
2607               col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2608               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2609               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2610             } else {
2611               for (k=0;k<size_of_constraint;k++) {
2612                 col = temp_indices_to_constraint[temp_indices[total_counts]+k];
2613                 if (row != col) {
2614                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2615                 } else {
2616                   val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm;
2617                 }
2618                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2619               }
2620             }
2621           }
2622           if (pcbddc->dbg_flag) {
2623             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2624           }
2625         }
2626         /* increment primal counter */
2627         primal_counter += primal_dofs;
2628       } else {
2629         if (pcbddc->dbg_flag) {
2630           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2631         }
2632       }
2633       /* increment constraint counter total_counts */
2634       total_counts += primal_dofs;
2635     }
2636 
2637     /* free workspace */
2638     if (qr_needed) {
2639       if (pcbddc->dbg_flag) {
2640         ierr = PetscFree(work);CHKERRQ(ierr);
2641       }
2642       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2643       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2644       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2645       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2646       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2647     }
2648     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2649     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2650     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2651 
2652     /* assembling of global change of variable */
2653     {
2654       Mat      tmat;
2655       PetscInt bs;
2656 
2657       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2658       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2659       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2660       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2661       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2662       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2663       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
2664       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
2665       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2666       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2667       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2668       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2669       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2670       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2671       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2672       ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2673       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2674       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2675     }
2676     /* check */
2677     if (pcbddc->dbg_flag) {
2678       PetscReal error;
2679       Vec       x,x_change;
2680 
2681       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2682       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2683       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2684       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2685       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2686       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2687       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2688       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2689       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2690       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2691       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2692       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2693       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2694       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2695       ierr = VecDestroy(&x);CHKERRQ(ierr);
2696       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2697     }
2698 
2699     /* adapt sub_schurs computed (if any) */
2700     if (pcbddc->use_deluxe_scaling) {
2701       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2702       if (sub_schurs->n_subs_par_g) {
2703         SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Change of basis with deluxe scaling and parallel problems still needs to be implemented");
2704       }
2705       if (sub_schurs->S_Ej_all) {
2706         Mat S_1,S_2,tmat;
2707         IS is_all_N;
2708 
2709         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2710         ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
2711         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2712         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr);
2713         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2714         sub_schurs->S_Ej_all = S_1;
2715         ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr);
2716         ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2717         sub_schurs->sum_S_Ej_all = S_2;
2718         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2719       }
2720     }
2721     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2722   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2723     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2724     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2725   }
2726 
2727   /* set up change of basis context */
2728   if (pcbddc->ChangeOfBasisMatrix) {
2729     PCBDDCChange_ctx change_ctx;
2730 
2731     if (!pcbddc->new_global_mat) {
2732       PetscInt global_size,local_size;
2733 
2734       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2735       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2736       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2737       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2738       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2739       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2740       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2741       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2742       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2743     } else {
2744       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2745       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2746       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2747     }
2748     if (!pcbddc->user_ChangeOfBasisMatrix) {
2749       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2750       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2751     } else {
2752       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2753       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2754     }
2755     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2756     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2757     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2758     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2759   }
2760 
2761   /* get indices in local ordering for vertices and constraints */
2762   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2763     ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
2764     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2765   }
2766   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2767   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2768   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2769   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2770   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2771   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2772   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2773   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2774   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2775   /* set quantities in PCBDDC data struct */
2776   pcbddc->n_actual_vertices = i;
2777   /* check if a new primal space has been introduced */
2778   pcbddc->new_primal_space_local = PETSC_TRUE;
2779   if (olocal_primal_size == pcbddc->local_primal_size) {
2780     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2781     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2782     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2783   }
2784   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2785   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2786 
2787   /* flush dbg viewer */
2788   if (pcbddc->dbg_flag) {
2789     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2790   }
2791 
2792   /* free workspace */
2793   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2794   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2795   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2796   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2797   if (!pcbddc->adaptive_selection) {
2798     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2799     ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr);
2800   } else {
2801     ierr = PetscFree4(pcbddc->adaptive_constraints_n,
2802                       pcbddc->adaptive_constraints_ptrs,
2803                       pcbddc->adaptive_constraints_idxs,
2804                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2805     ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2806   }
2807   PetscFunctionReturn(0);
2808 }
2809 
2810 #undef __FUNCT__
2811 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2812 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2813 {
2814   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2815   PC_IS       *pcis = (PC_IS*)pc->data;
2816   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2817   PetscInt    ierr,i,vertex_size;
2818   PetscViewer viewer=pcbddc->dbg_viewer;
2819 
2820   PetscFunctionBegin;
2821   /* Reset previously computed graph */
2822   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2823   /* Init local Graph struct */
2824   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2825 
2826   /* Check validity of the csr graph passed in by the user */
2827   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2828     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2829   }
2830 
2831   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2832   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2833     PetscInt *xadj,*adjncy;
2834     PetscInt nvtxs;
2835 
2836     if (pcbddc->use_local_adj) {
2837       PetscBool flg_row=PETSC_FALSE;
2838 
2839       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2840       if (flg_row) {
2841         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2842         pcbddc->computed_rowadj = PETSC_TRUE;
2843       }
2844       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2845     } else if (pcbddc->current_level) { /* just compute subdomain's connected components for coarser levels */
2846       IS                     is_dummy;
2847       ISLocalToGlobalMapping l2gmap_dummy;
2848       PetscInt               j,sum;
2849       PetscInt               *cxadj,*cadjncy;
2850       const PetscInt         *idxs;
2851       PCBDDCGraph            graph;
2852       PetscBT                is_on_boundary;
2853 
2854       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
2855       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
2856       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
2857       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
2858       ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr);
2859       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
2860       graph->xadj = xadj;
2861       graph->adjncy = adjncy;
2862       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
2863       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
2864 
2865       if (pcbddc->dbg_flag) {
2866         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr);
2867         for (i=0;i<graph->ncc;i++) {
2868           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
2869         }
2870         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2871       }
2872 
2873       ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr);
2874       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2875       for (i=0;i<pcis->n_B;i++) {
2876         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
2877       }
2878       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2879 
2880       ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr);
2881       sum = 0;
2882       for (i=0;i<graph->ncc;i++) {
2883         PetscInt sizecc = 0;
2884         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2885           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2886             sizecc++;
2887           }
2888         }
2889         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2890           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2891             cxadj[graph->queue[j]] = sizecc;
2892           }
2893         }
2894         sum += sizecc*sizecc;
2895       }
2896       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
2897       sum = 0;
2898       for (i=0;i<nvtxs;i++) {
2899         PetscInt temp = cxadj[i];
2900         cxadj[i] = sum;
2901         sum += temp;
2902       }
2903       cxadj[nvtxs] = sum;
2904       for (i=0;i<graph->ncc;i++) {
2905         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2906           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2907             PetscInt k,sizecc = 0;
2908             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
2909               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
2910                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
2911                 sizecc++;
2912               }
2913             }
2914           }
2915         }
2916       }
2917       if (nvtxs) {
2918         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
2919       } else {
2920         ierr = PetscFree(cxadj);CHKERRQ(ierr);
2921         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
2922       }
2923       graph->xadj = 0;
2924       graph->adjncy = 0;
2925       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
2926       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
2927     }
2928   }
2929 
2930   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2931   vertex_size = 1;
2932   if (pcbddc->user_provided_isfordofs) {
2933     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2934       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2935       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2936         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2937         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2938       }
2939       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2940       pcbddc->n_ISForDofs = 0;
2941       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2942     }
2943     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2944     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2945   } else {
2946     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2947       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2948       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2949       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2950         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2951       }
2952     }
2953   }
2954 
2955   /* Setup of Graph */
2956   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2957     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2958   }
2959   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2960     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2961   }
2962   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
2963 
2964   /* Graph's connected components analysis */
2965   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2966 
2967   /* print some info to stdout */
2968   if (pcbddc->dbg_flag) {
2969     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2970   }
2971 
2972   /* mark topography has done */
2973   pcbddc->recompute_topography = PETSC_FALSE;
2974   PetscFunctionReturn(0);
2975 }
2976 
2977 #undef __FUNCT__
2978 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2979 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2980 {
2981   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2982   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2983   PetscErrorCode ierr;
2984 
2985   PetscFunctionBegin;
2986   n = 0;
2987   vertices = 0;
2988   if (pcbddc->ConstraintMatrix) {
2989     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2990     for (i=0;i<local_primal_size;i++) {
2991       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2992       if (size_of_constraint == 1) n++;
2993       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2994     }
2995     if (vertices_idx) {
2996       ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr);
2997       n = 0;
2998       for (i=0;i<local_primal_size;i++) {
2999         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3000         if (size_of_constraint == 1) {
3001           vertices[n++]=row_cmat_indices[0];
3002         }
3003         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3004       }
3005     }
3006   }
3007   *n_vertices = n;
3008   if (vertices_idx) *vertices_idx = vertices;
3009   PetscFunctionReturn(0);
3010 }
3011 
3012 #undef __FUNCT__
3013 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
3014 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
3015 {
3016   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
3017   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
3018   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
3019   PetscBT        touched;
3020   PetscErrorCode ierr;
3021 
3022     /* This function assumes that the number of local constraints per connected component
3023        is not greater than the number of nodes defined for the connected component
3024        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
3025   PetscFunctionBegin;
3026   n = 0;
3027   constraints_index = 0;
3028   if (pcbddc->ConstraintMatrix) {
3029     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
3030     max_size_of_constraint = 0;
3031     for (i=0;i<local_primal_size;i++) {
3032       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3033       if (size_of_constraint > 1) {
3034         n++;
3035       }
3036       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
3037       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3038     }
3039     if (constraints_idx) {
3040       ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr);
3041       ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr);
3042       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
3043       n = 0;
3044       for (i=0;i<local_primal_size;i++) {
3045         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3046         if (size_of_constraint > 1) {
3047           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
3048           /* find first untouched local node */
3049           j = 0;
3050           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
3051           min_index = row_cmat_global_indices[j];
3052           min_loc = j;
3053           /* search the minimum among nodes not yet touched on the connected component
3054              since there can be more than one constraint on a single cc */
3055           for (j=1;j<size_of_constraint;j++) {
3056             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
3057               min_index = row_cmat_global_indices[j];
3058               min_loc = j;
3059             }
3060           }
3061           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
3062           constraints_index[n++] = row_cmat_indices[min_loc];
3063         }
3064         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3065       }
3066       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
3067       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
3068     }
3069   }
3070   *n_constraints = n;
3071   if (constraints_idx) *constraints_idx = constraints_index;
3072   PetscFunctionReturn(0);
3073 }
3074 
3075 #undef __FUNCT__
3076 #define __FUNCT__ "PCBDDCSubsetNumbering"
3077 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
3078 {
3079   Vec            local_vec,global_vec;
3080   IS             seqis,paris;
3081   VecScatter     scatter_ctx;
3082   PetscScalar    *array;
3083   PetscInt       *temp_global_dofs;
3084   PetscScalar    globalsum;
3085   PetscInt       i,j,s;
3086   PetscInt       nlocals,first_index,old_index,max_local;
3087   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
3088   PetscMPIInt    *dof_sizes,*dof_displs;
3089   PetscBool      first_found;
3090   PetscErrorCode ierr;
3091 
3092   PetscFunctionBegin;
3093   /* mpi buffers */
3094   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
3095   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
3096   j = ( !rank_prec_comm ? size_prec_comm : 0);
3097   ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr);
3098   ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr);
3099   /* get maximum size of subset */
3100   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
3101   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
3102   max_local = 0;
3103   for (i=0;i<n_local_dofs;i++) {
3104     if (max_local < temp_global_dofs[i] ) {
3105       max_local = temp_global_dofs[i];
3106     }
3107   }
3108   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
3109   max_global++;
3110   max_local = 0;
3111   for (i=0;i<n_local_dofs;i++) {
3112     if (max_local < local_dofs[i] ) {
3113       max_local = local_dofs[i];
3114     }
3115   }
3116   max_local++;
3117   /* allocate workspace */
3118   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
3119   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
3120   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
3121   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
3122   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
3123   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
3124   /* create scatter */
3125   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
3126   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
3127   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
3128   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
3129   ierr = ISDestroy(&paris);CHKERRQ(ierr);
3130   /* init array */
3131   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
3132   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3133   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3134   if (local_dofs_mult) {
3135     for (i=0;i<n_local_dofs;i++) {
3136       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
3137     }
3138   } else {
3139     for (i=0;i<n_local_dofs;i++) {
3140       array[local_dofs[i]]=1.0;
3141     }
3142   }
3143   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3144   /* scatter into global vec and get total number of global dofs */
3145   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3146   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3147   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
3148   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
3149   /* Fill global_vec with cumulative function for global numbering */
3150   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
3151   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
3152   nlocals = 0;
3153   first_index = -1;
3154   first_found = PETSC_FALSE;
3155   for (i=0;i<s;i++) {
3156     if (!first_found && PetscRealPart(array[i]) > 0.1) {
3157       first_found = PETSC_TRUE;
3158       first_index = i;
3159     }
3160     nlocals += (PetscInt)PetscRealPart(array[i]);
3161   }
3162   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3163   if (!rank_prec_comm) {
3164     dof_displs[0]=0;
3165     for (i=1;i<size_prec_comm;i++) {
3166       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3167     }
3168   }
3169   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3170   if (first_found) {
3171     array[first_index] += (PetscScalar)nlocals;
3172     old_index = first_index;
3173     for (i=first_index+1;i<s;i++) {
3174       if (PetscRealPart(array[i]) > 0.1) {
3175         array[i] += array[old_index];
3176         old_index = i;
3177       }
3178     }
3179   }
3180   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
3181   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3182   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3183   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3184   /* get global ordering of local dofs */
3185   ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3186   if (local_dofs_mult) {
3187     for (i=0;i<n_local_dofs;i++) {
3188       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
3189     }
3190   } else {
3191     for (i=0;i<n_local_dofs;i++) {
3192       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
3193     }
3194   }
3195   ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3196   /* free workspace */
3197   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
3198   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
3199   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
3200   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
3201   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
3202   /* return pointer to global ordering of local dofs */
3203   *global_numbering_subset = temp_global_dofs;
3204   PetscFunctionReturn(0);
3205 }
3206 
3207 #undef __FUNCT__
3208 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3209 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3210 {
3211   PetscInt       i,j;
3212   PetscScalar    *alphas;
3213   PetscErrorCode ierr;
3214 
3215   PetscFunctionBegin;
3216   /* this implements stabilized Gram-Schmidt */
3217   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3218   for (i=0;i<n;i++) {
3219     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3220     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3221     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3222   }
3223   ierr = PetscFree(alphas);CHKERRQ(ierr);
3224   PetscFunctionReturn(0);
3225 }
3226 
3227 #undef __FUNCT__
3228 #define __FUNCT__ "MatISGetSubassemblingPattern"
3229 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
3230 {
3231   Mat             subdomain_adj;
3232   IS              new_ranks,ranks_send_to;
3233   MatPartitioning partitioner;
3234   Mat_IS          *matis;
3235   PetscInt        n_neighs,*neighs,*n_shared,**shared;
3236   PetscInt        prank;
3237   PetscMPIInt     size,rank,color;
3238   PetscInt        *xadj,*adjncy,*oldranks;
3239   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
3240   PetscInt        i,local_size,threshold=0;
3241   PetscErrorCode  ierr;
3242   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3243   PetscSubcomm    subcomm;
3244 
3245   PetscFunctionBegin;
3246   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3247   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3248   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3249 
3250   /* Get info on mapping */
3251   matis = (Mat_IS*)(mat->data);
3252   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3253   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3254 
3255   /* build local CSR graph of subdomains' connectivity */
3256   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3257   xadj[0] = 0;
3258   xadj[1] = PetscMax(n_neighs-1,0);
3259   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3260   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3261 
3262   if (threshold) {
3263     PetscInt xadj_count = 0;
3264     for (i=1;i<n_neighs;i++) {
3265       if (n_shared[i] > threshold) {
3266         adjncy[xadj_count] = neighs[i];
3267         adjncy_wgt[xadj_count] = n_shared[i];
3268         xadj_count++;
3269       }
3270     }
3271     xadj[1] = xadj_count;
3272   } else {
3273     if (xadj[1]) {
3274       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3275       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3276     }
3277   }
3278   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3279   if (use_square) {
3280     for (i=0;i<xadj[1];i++) {
3281       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3282     }
3283   }
3284   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3285 
3286   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3287 
3288   /*
3289     Restrict work on active processes only.
3290   */
3291   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3292   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3293   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3294   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3295   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3296   if (color) {
3297     ierr = PetscFree(xadj);CHKERRQ(ierr);
3298     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3299     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3300   } else {
3301     PetscInt coarsening_ratio;
3302     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
3303     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3304     prank = rank;
3305     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
3306     /*
3307     for (i=0;i<size;i++) {
3308       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3309     }
3310     */
3311     for (i=0;i<xadj[1];i++) {
3312       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3313     }
3314     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3315     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3316     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3317 
3318     /* Partition */
3319     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
3320     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3321     if (use_vwgt) {
3322       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3323       v_wgt[0] = local_size;
3324       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3325     }
3326     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3327     coarsening_ratio = size/n_subdomains;
3328     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3329     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3330     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3331     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3332 
3333     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3334     if (contiguous) {
3335       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3336     } else {
3337       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3338     }
3339     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3340     /* clean up */
3341     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3342     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3343     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3344     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3345   }
3346   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3347 
3348   /* assemble parallel IS for sends */
3349   i = 1;
3350   if (color) i=0;
3351   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3352 
3353   /* get back IS */
3354   *is_sends = ranks_send_to;
3355   PetscFunctionReturn(0);
3356 }
3357 
3358 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3359 
3360 #undef __FUNCT__
3361 #define __FUNCT__ "MatISSubassemble"
3362 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3363 {
3364   Mat                    local_mat;
3365   Mat_IS                 *matis;
3366   IS                     is_sends_internal;
3367   PetscInt               rows,cols,new_local_rows;
3368   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3369   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3370   ISLocalToGlobalMapping l2gmap;
3371   PetscInt*              l2gmap_indices;
3372   const PetscInt*        is_indices;
3373   MatType                new_local_type;
3374   /* buffers */
3375   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3376   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3377   PetscInt               *recv_buffer_idxs_local;
3378   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3379   /* MPI */
3380   MPI_Comm               comm,comm_n;
3381   PetscSubcomm           subcomm;
3382   PetscMPIInt            n_sends,n_recvs,commsize;
3383   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3384   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3385   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3386   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3387   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3388   PetscErrorCode         ierr;
3389 
3390   PetscFunctionBegin;
3391   /* TODO: add missing checks */
3392   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3393   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3394   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3395   PetscValidLogicalCollectiveInt(mat,nis,7);
3396   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3397   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3398   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3399   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3400   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3401   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3402   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3403   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3404     PetscInt mrows,mcols,mnrows,mncols;
3405     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3406     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3407     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3408     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3409     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3410     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3411   }
3412   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3413   PetscValidLogicalCollectiveInt(mat,bs,0);
3414   /* prepare IS for sending if not provided */
3415   if (!is_sends) {
3416     PetscBool pcontig = PETSC_TRUE;
3417     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3418     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3419   } else {
3420     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3421     is_sends_internal = is_sends;
3422   }
3423 
3424   /* get pointer of MATIS data */
3425   matis = (Mat_IS*)mat->data;
3426 
3427   /* get comm */
3428   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3429 
3430   /* compute number of sends */
3431   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3432   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3433 
3434   /* compute number of receives */
3435   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3436   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3437   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3438   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3439   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3440   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3441   ierr = PetscFree(iflags);CHKERRQ(ierr);
3442 
3443   /* restrict comm if requested */
3444   subcomm = 0;
3445   destroy_mat = PETSC_FALSE;
3446   if (restrict_comm) {
3447     PetscMPIInt color,subcommsize;
3448 
3449     color = 0;
3450     if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */
3451     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3452     subcommsize = commsize - subcommsize;
3453     /* check if reuse has been requested */
3454     if (reuse == MAT_REUSE_MATRIX) {
3455       if (*mat_n) {
3456         PetscMPIInt subcommsize2;
3457         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3458         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3459         comm_n = PetscObjectComm((PetscObject)*mat_n);
3460       } else {
3461         comm_n = PETSC_COMM_SELF;
3462       }
3463     } else { /* MAT_INITIAL_MATRIX */
3464       PetscMPIInt rank;
3465 
3466       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3467       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3468       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3469       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3470       comm_n = subcomm->comm;
3471     }
3472     /* flag to destroy *mat_n if not significative */
3473     if (color) destroy_mat = PETSC_TRUE;
3474   } else {
3475     comm_n = comm;
3476   }
3477 
3478   /* prepare send/receive buffers */
3479   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3480   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3481   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3482   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3483   if (nis) {
3484     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3485   }
3486 
3487   /* Get data from local matrices */
3488   if (!isdense) {
3489     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3490     /* TODO: See below some guidelines on how to prepare the local buffers */
3491     /*
3492        send_buffer_vals should contain the raw values of the local matrix
3493        send_buffer_idxs should contain:
3494        - MatType_PRIVATE type
3495        - PetscInt        size_of_l2gmap
3496        - PetscInt        global_row_indices[size_of_l2gmap]
3497        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3498     */
3499   } else {
3500     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3501     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3502     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3503     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3504     send_buffer_idxs[1] = i;
3505     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3506     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3507     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3508     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3509     for (i=0;i<n_sends;i++) {
3510       ilengths_vals[is_indices[i]] = len*len;
3511       ilengths_idxs[is_indices[i]] = len+2;
3512     }
3513   }
3514   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3515   /* additional is (if any) */
3516   if (nis) {
3517     PetscMPIInt psum;
3518     PetscInt j;
3519     for (j=0,psum=0;j<nis;j++) {
3520       PetscInt plen;
3521       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3522       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3523       psum += len+1; /* indices + lenght */
3524     }
3525     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3526     for (j=0,psum=0;j<nis;j++) {
3527       PetscInt plen;
3528       const PetscInt *is_array_idxs;
3529       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3530       send_buffer_idxs_is[psum] = plen;
3531       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3532       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3533       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3534       psum += plen+1; /* indices + lenght */
3535     }
3536     for (i=0;i<n_sends;i++) {
3537       ilengths_idxs_is[is_indices[i]] = psum;
3538     }
3539     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3540   }
3541 
3542   buf_size_idxs = 0;
3543   buf_size_vals = 0;
3544   buf_size_idxs_is = 0;
3545   for (i=0;i<n_recvs;i++) {
3546     buf_size_idxs += (PetscInt)olengths_idxs[i];
3547     buf_size_vals += (PetscInt)olengths_vals[i];
3548     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3549   }
3550   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3551   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3552   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3553 
3554   /* get new tags for clean communications */
3555   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3556   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3557   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3558 
3559   /* allocate for requests */
3560   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3561   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3562   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3563   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3564   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3565   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3566 
3567   /* communications */
3568   ptr_idxs = recv_buffer_idxs;
3569   ptr_vals = recv_buffer_vals;
3570   ptr_idxs_is = recv_buffer_idxs_is;
3571   for (i=0;i<n_recvs;i++) {
3572     source_dest = onodes[i];
3573     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3574     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3575     ptr_idxs += olengths_idxs[i];
3576     ptr_vals += olengths_vals[i];
3577     if (nis) {
3578       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3579       ptr_idxs_is += olengths_idxs_is[i];
3580     }
3581   }
3582   for (i=0;i<n_sends;i++) {
3583     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3584     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3585     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3586     if (nis) {
3587       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3588     }
3589   }
3590   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3591   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3592 
3593   /* assemble new l2g map */
3594   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3595   ptr_idxs = recv_buffer_idxs;
3596   new_local_rows = 0;
3597   for (i=0;i<n_recvs;i++) {
3598     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3599     ptr_idxs += olengths_idxs[i];
3600   }
3601   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3602   ptr_idxs = recv_buffer_idxs;
3603   new_local_rows = 0;
3604   for (i=0;i<n_recvs;i++) {
3605     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3606     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3607     ptr_idxs += olengths_idxs[i];
3608   }
3609   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3610   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3611   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3612 
3613   /* infer new local matrix type from received local matrices type */
3614   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3615   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3616   if (n_recvs) {
3617     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3618     ptr_idxs = recv_buffer_idxs;
3619     for (i=0;i<n_recvs;i++) {
3620       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3621         new_local_type_private = MATAIJ_PRIVATE;
3622         break;
3623       }
3624       ptr_idxs += olengths_idxs[i];
3625     }
3626     switch (new_local_type_private) {
3627       case MATDENSE_PRIVATE:
3628         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3629           new_local_type = MATSEQAIJ;
3630           bs = 1;
3631         } else { /* if I receive only 1 dense matrix */
3632           new_local_type = MATSEQDENSE;
3633           bs = 1;
3634         }
3635         break;
3636       case MATAIJ_PRIVATE:
3637         new_local_type = MATSEQAIJ;
3638         bs = 1;
3639         break;
3640       case MATBAIJ_PRIVATE:
3641         new_local_type = MATSEQBAIJ;
3642         break;
3643       case MATSBAIJ_PRIVATE:
3644         new_local_type = MATSEQSBAIJ;
3645         break;
3646       default:
3647         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3648         break;
3649     }
3650   } else { /* by default, new_local_type is seqdense */
3651     new_local_type = MATSEQDENSE;
3652     bs = 1;
3653   }
3654 
3655   /* create MATIS object if needed */
3656   if (reuse == MAT_INITIAL_MATRIX) {
3657     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3658     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3659   } else {
3660     /* it also destroys the local matrices */
3661     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3662   }
3663   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3664   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3665 
3666   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3667 
3668   /* Global to local map of received indices */
3669   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3670   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3671   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3672 
3673   /* restore attributes -> type of incoming data and its size */
3674   buf_size_idxs = 0;
3675   for (i=0;i<n_recvs;i++) {
3676     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3677     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3678     buf_size_idxs += (PetscInt)olengths_idxs[i];
3679   }
3680   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3681 
3682   /* set preallocation */
3683   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3684   if (!newisdense) {
3685     PetscInt *new_local_nnz=0;
3686 
3687     ptr_vals = recv_buffer_vals;
3688     ptr_idxs = recv_buffer_idxs_local;
3689     if (n_recvs) {
3690       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3691     }
3692     for (i=0;i<n_recvs;i++) {
3693       PetscInt j;
3694       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3695         for (j=0;j<*(ptr_idxs+1);j++) {
3696           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3697         }
3698       } else {
3699         /* TODO */
3700       }
3701       ptr_idxs += olengths_idxs[i];
3702     }
3703     if (new_local_nnz) {
3704       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3705       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3706       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3707       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3708       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3709       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3710     } else {
3711       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3712     }
3713     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3714   } else {
3715     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3716   }
3717 
3718   /* set values */
3719   ptr_vals = recv_buffer_vals;
3720   ptr_idxs = recv_buffer_idxs_local;
3721   for (i=0;i<n_recvs;i++) {
3722     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3723       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3724       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3725       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3726       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3727       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3728     } else {
3729       /* TODO */
3730     }
3731     ptr_idxs += olengths_idxs[i];
3732     ptr_vals += olengths_vals[i];
3733   }
3734   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3735   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3736   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3737   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3738   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3739   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3740 
3741 #if 0
3742   if (!restrict_comm) { /* check */
3743     Vec       lvec,rvec;
3744     PetscReal infty_error;
3745 
3746     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3747     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3748     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3749     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3750     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3751     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3752     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3753     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3754     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3755   }
3756 #endif
3757 
3758   /* assemble new additional is (if any) */
3759   if (nis) {
3760     PetscInt **temp_idxs,*count_is,j,psum;
3761 
3762     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3763     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3764     ptr_idxs = recv_buffer_idxs_is;
3765     psum = 0;
3766     for (i=0;i<n_recvs;i++) {
3767       for (j=0;j<nis;j++) {
3768         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3769         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3770         psum += plen;
3771         ptr_idxs += plen+1; /* shift pointer to received data */
3772       }
3773     }
3774     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3775     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3776     for (i=1;i<nis;i++) {
3777       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3778     }
3779     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3780     ptr_idxs = recv_buffer_idxs_is;
3781     for (i=0;i<n_recvs;i++) {
3782       for (j=0;j<nis;j++) {
3783         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3784         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3785         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3786         ptr_idxs += plen+1; /* shift pointer to received data */
3787       }
3788     }
3789     for (i=0;i<nis;i++) {
3790       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3791       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3792       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3793     }
3794     ierr = PetscFree(count_is);CHKERRQ(ierr);
3795     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3796     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3797   }
3798   /* free workspace */
3799   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3800   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3801   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3802   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3803   if (isdense) {
3804     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3805     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3806   } else {
3807     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3808   }
3809   if (nis) {
3810     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3811     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3812   }
3813   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3814   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3815   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3816   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3817   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3818   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3819   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3820   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3821   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3822   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3823   ierr = PetscFree(onodes);CHKERRQ(ierr);
3824   if (nis) {
3825     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3826     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3827     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3828   }
3829   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3830   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3831     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3832     for (i=0;i<nis;i++) {
3833       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3834     }
3835   }
3836   PetscFunctionReturn(0);
3837 }
3838 
3839 /* temporary hack into ksp private data structure */
3840 #include <petsc-private/kspimpl.h>
3841 
3842 #undef __FUNCT__
3843 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3844 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3845 {
3846   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3847   PC_IS                  *pcis = (PC_IS*)pc->data;
3848   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3849   MatNullSpace           CoarseNullSpace=NULL;
3850   ISLocalToGlobalMapping coarse_islg;
3851   IS                     coarse_is,*isarray;
3852   PetscInt               i,im_active=-1,active_procs=-1;
3853   PetscInt               nis,nisdofs,nisneu;
3854   PC                     pc_temp;
3855   PCType                 coarse_pc_type;
3856   KSPType                coarse_ksp_type;
3857   PetscBool              multilevel_requested,multilevel_allowed;
3858   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3859   Mat                    t_coarse_mat_is;
3860   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3861   PetscMPIInt            all_procs;
3862   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3863   PetscBool              compute_vecs = PETSC_FALSE;
3864   PetscScalar            *array;
3865   PetscErrorCode         ierr;
3866 
3867   PetscFunctionBegin;
3868   /* Assign global numbering to coarse dofs */
3869   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3870     compute_vecs = PETSC_TRUE;
3871     PetscInt ocoarse_size;
3872     ocoarse_size = pcbddc->coarse_size;
3873     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3874     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3875     /* see if we can avoid some work */
3876     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3877       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3878         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3879         coarse_reuse = PETSC_FALSE;
3880       } else { /* we can safely reuse already computed coarse matrix */
3881         coarse_reuse = PETSC_TRUE;
3882       }
3883     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3884       coarse_reuse = PETSC_FALSE;
3885     }
3886     /* reset any subassembling information */
3887     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3888     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3889   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3890     coarse_reuse = PETSC_TRUE;
3891   }
3892 
3893   /* count "active" (i.e. with positive local size) and "void" processes */
3894   im_active = !!(pcis->n);
3895   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3896   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3897   void_procs = all_procs-active_procs;
3898   csin_type_simple = PETSC_TRUE;
3899   redist = PETSC_FALSE;
3900   if (pcbddc->current_level && void_procs) {
3901     csin_ml = PETSC_TRUE;
3902     ncoarse_ml = void_procs;
3903     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
3904     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
3905       csin_ds = PETSC_TRUE;
3906       ncoarse_ds = pcbddc->redistribute_coarse;
3907       redist = PETSC_TRUE;
3908     } else {
3909       csin_ds = PETSC_TRUE;
3910       ncoarse_ds = active_procs;
3911       redist = PETSC_TRUE;
3912     }
3913   } else {
3914     csin_ml = PETSC_FALSE;
3915     ncoarse_ml = all_procs;
3916     if (void_procs) {
3917       csin_ds = PETSC_TRUE;
3918       ncoarse_ds = void_procs;
3919       csin_type_simple = PETSC_FALSE;
3920     } else {
3921       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
3922         csin_ds = PETSC_TRUE;
3923         ncoarse_ds = pcbddc->redistribute_coarse;
3924         redist = PETSC_TRUE;
3925       } else {
3926         csin_ds = PETSC_FALSE;
3927         ncoarse_ds = all_procs;
3928       }
3929     }
3930   }
3931 
3932   /*
3933     test if we can go multilevel: three conditions must be satisfied:
3934     - we have not exceeded the number of levels requested
3935     - we can actually subassemble the active processes
3936     - we can find a suitable number of MPI processes where we can place the subassembled problem
3937   */
3938   multilevel_allowed = PETSC_FALSE;
3939   multilevel_requested = PETSC_FALSE;
3940   if (pcbddc->current_level < pcbddc->max_levels) {
3941     multilevel_requested = PETSC_TRUE;
3942     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3943       multilevel_allowed = PETSC_FALSE;
3944     } else {
3945       multilevel_allowed = PETSC_TRUE;
3946     }
3947   }
3948   /* determine number of process partecipating to coarse solver */
3949   if (multilevel_allowed) {
3950     ncoarse = ncoarse_ml;
3951     csin = csin_ml;
3952   } else {
3953     ncoarse = ncoarse_ds;
3954     csin = csin_ds;
3955   }
3956 
3957   /* creates temporary l2gmap and IS for coarse indexes */
3958   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3959   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3960 
3961   /* creates temporary MATIS object for coarse matrix */
3962   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
3963   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3964   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
3965   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3966 #if 0
3967   {
3968     PetscViewer viewer;
3969     char filename[256];
3970     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
3971     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
3972     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3973     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
3974     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3975   }
3976 #endif
3977   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
3978   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3979   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3980   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3981   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3982 
3983   /* compute dofs splitting and neumann boundaries for coarse dofs */
3984   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
3985     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3986     const PetscInt         *idxs;
3987     ISLocalToGlobalMapping tmap;
3988 
3989     /* create map between primal indices (in local representative ordering) and local primal numbering */
3990     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3991     /* allocate space for temporary storage */
3992     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
3993     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
3994     /* allocate for IS array */
3995     nisdofs = pcbddc->n_ISForDofsLocal;
3996     nisneu = !!pcbddc->NeumannBoundariesLocal;
3997     nis = nisdofs + nisneu;
3998     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
3999     /* dofs splitting */
4000     for (i=0;i<nisdofs;i++) {
4001       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4002       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4003       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4004       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4005       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4006       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4007       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4008       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4009     }
4010     /* neumann boundaries */
4011     if (pcbddc->NeumannBoundariesLocal) {
4012       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4013       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4014       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4015       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4016       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4017       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4018       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4019       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4020     }
4021     /* free memory */
4022     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4023     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4024     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4025   } else {
4026     nis = 0;
4027     nisdofs = 0;
4028     nisneu = 0;
4029     isarray = NULL;
4030   }
4031   /* destroy no longer needed map */
4032   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4033 
4034   /* restrict on coarse candidates (if needed) */
4035   coarse_mat_is = NULL;
4036   if (csin) {
4037     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4038       if (redist) {
4039         PetscMPIInt rank;
4040         PetscInt    spc,n_spc_p1,dest[1],destsize;
4041 
4042         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4043         spc = active_procs/pcbddc->redistribute_coarse;
4044         n_spc_p1 = active_procs%pcbddc->redistribute_coarse;
4045         if (im_active) {
4046           destsize = 1;
4047           if (rank > n_spc_p1*(spc+1)-1) {
4048             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4049           } else {
4050             dest[0] = rank/(spc+1);
4051           }
4052         } else {
4053           destsize = 0;
4054         }
4055         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4056       } else if (csin_type_simple) {
4057         PetscMPIInt rank;
4058         PetscInt    issize,isidx;
4059 
4060         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4061         if (im_active) {
4062           issize = 1;
4063           isidx = (PetscInt)rank;
4064         } else {
4065           issize = 0;
4066           isidx = -1;
4067         }
4068         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4069       } else { /* get a suitable subassembling pattern from MATIS code */
4070         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4071       }
4072 
4073       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4074       if (!redist || ncoarse <= void_procs) {
4075         PetscInt ncoarse_cand,tissize,*nisindices;
4076         PetscInt *coarse_candidates;
4077         const PetscInt* tisindices;
4078 
4079         /* get coarse candidates' ranks in pc communicator */
4080         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4081         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4082         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4083           if (!coarse_candidates[i]) {
4084             coarse_candidates[ncoarse_cand++]=i;
4085           }
4086         }
4087         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4088 
4089 
4090         if (pcbddc->dbg_flag) {
4091           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4092           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4093           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4094           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4095           for (i=0;i<ncoarse_cand;i++) {
4096             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4097           }
4098           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4099           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4100         }
4101         /* shift the pattern on coarse candidates */
4102         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4103         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4104         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4105         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4106         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4107         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4108         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4109       }
4110       if (pcbddc->dbg_flag) {
4111         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4112         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4113         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4114         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4115       }
4116     }
4117     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4118     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4119   } else {
4120     if (pcbddc->dbg_flag) {
4121       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4122       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4123       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4124     }
4125     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4126     coarse_mat_is = t_coarse_mat_is;
4127   }
4128 
4129   /* create local to global scatters for coarse problem */
4130   if (compute_vecs) {
4131     PetscInt lrows;
4132     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4133     if (coarse_mat_is) {
4134       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4135     } else {
4136       lrows = 0;
4137     }
4138     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4139     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4140     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4141     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4142     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4143   }
4144   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4145   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4146 
4147   /* set defaults for coarse KSP and PC */
4148   if (multilevel_allowed) {
4149     coarse_ksp_type = KSPRICHARDSON;
4150     coarse_pc_type = PCBDDC;
4151   } else {
4152     coarse_ksp_type = KSPPREONLY;
4153     coarse_pc_type = PCREDUNDANT;
4154   }
4155 
4156   /* print some info if requested */
4157   if (pcbddc->dbg_flag) {
4158     if (!multilevel_allowed) {
4159       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4160       if (multilevel_requested) {
4161         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4162       } else if (pcbddc->max_levels) {
4163         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4164       }
4165       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4166     }
4167   }
4168 
4169   /* create the coarse KSP object only once with defaults */
4170   if (coarse_mat_is) {
4171     MatReuse coarse_mat_reuse;
4172     PetscViewer dbg_viewer = NULL;
4173     if (pcbddc->dbg_flag) {
4174       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4175       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4176     }
4177     if (!pcbddc->coarse_ksp) {
4178       char prefix[256],str_level[16];
4179       size_t len;
4180       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4181       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4182       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4183       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4184       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4185       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4186       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4187       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4188       /* prefix */
4189       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4190       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4191       if (!pcbddc->current_level) {
4192         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4193         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4194       } else {
4195         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4196         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4197         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4198         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4199         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4200         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4201       }
4202       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4203       /* allow user customization */
4204       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4205       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4206     }
4207 
4208     /* get some info after set from options */
4209     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4210     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4211     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4212     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4213     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4214       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4215       isbddc = PETSC_FALSE;
4216     }
4217     if (isredundant) {
4218       KSP inner_ksp;
4219       PC inner_pc;
4220       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4221       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4222       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4223     }
4224 
4225     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4226     ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4227     ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4228     ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4229     if (nisdofs) {
4230       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4231       for (i=0;i<nisdofs;i++) {
4232         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4233       }
4234     }
4235     if (nisneu) {
4236       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4237       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4238     }
4239 
4240     /* assemble coarse matrix */
4241     if (coarse_reuse) {
4242       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4243       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4244       coarse_mat_reuse = MAT_REUSE_MATRIX;
4245     } else {
4246       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4247     }
4248     if (isbddc || isnn) {
4249       if (pcbddc->coarsening_ratio > 1) {
4250         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4251           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4252           if (pcbddc->dbg_flag) {
4253             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4254             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4255             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4256             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4257           }
4258         }
4259         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4260       } else {
4261         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4262         coarse_mat = coarse_mat_is;
4263       }
4264     } else {
4265       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4266     }
4267     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4268 
4269     /* propagate symmetry info to coarse matrix */
4270     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
4271     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4272 
4273     /* set operators */
4274     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4275     if (pcbddc->dbg_flag) {
4276       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4277     }
4278   } else { /* processes non partecipating to coarse solver (if any) */
4279     coarse_mat = 0;
4280   }
4281   ierr = PetscFree(isarray);CHKERRQ(ierr);
4282 #if 0
4283   {
4284     PetscViewer viewer;
4285     char filename[256];
4286     sprintf(filename,"coarse_mat.m");
4287     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4288     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4289     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4290     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4291   }
4292 #endif
4293 
4294   /* Compute coarse null space (special handling by BDDC only) */
4295   if (pcbddc->NullSpace) {
4296     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4297   }
4298 
4299   if (pcbddc->coarse_ksp) {
4300     Vec crhs,csol;
4301     PetscBool ispreonly;
4302     if (CoarseNullSpace) {
4303       if (isbddc) {
4304         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4305       } else {
4306         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4307       }
4308     }
4309     /* setup coarse ksp */
4310     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4311     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4312     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4313     /* hack */
4314     if (!csol) {
4315       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4316     }
4317     if (!crhs) {
4318       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4319     }
4320     /* Check coarse problem if in debug mode or if solving with an iterative method */
4321     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4322     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4323       KSP       check_ksp;
4324       KSPType   check_ksp_type;
4325       PC        check_pc;
4326       Vec       check_vec,coarse_vec;
4327       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4328       PetscInt  its;
4329       PetscBool compute_eigs;
4330       PetscReal *eigs_r,*eigs_c;
4331       PetscInt  neigs;
4332       const char *prefix;
4333 
4334       /* Create ksp object suitable for estimation of extreme eigenvalues */
4335       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4336       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4337       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4338       if (ispreonly) {
4339         check_ksp_type = KSPPREONLY;
4340         compute_eigs = PETSC_FALSE;
4341       } else {
4342         check_ksp_type = KSPGMRES;
4343         compute_eigs = PETSC_TRUE;
4344       }
4345       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4346       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4347       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4348       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4349       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4350       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4351       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4352       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4353       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4354       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4355       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4356       /* create random vec */
4357       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4358       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4359       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4360       if (CoarseNullSpace) {
4361         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4362       }
4363       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4364       /* solve coarse problem */
4365       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4366       if (CoarseNullSpace) {
4367         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4368       }
4369       /* set eigenvalue estimation if preonly has not been requested */
4370       if (compute_eigs) {
4371         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4372         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4373         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4374         lambda_max = eigs_r[neigs-1];
4375         lambda_min = eigs_r[0];
4376         if (pcbddc->use_coarse_estimates) {
4377           if (lambda_max>lambda_min) {
4378             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4379             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4380           }
4381         }
4382       }
4383 
4384       /* check coarse problem residual error */
4385       if (pcbddc->dbg_flag) {
4386         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4387         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4388         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4389         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4390         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4391         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4392         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4393         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4394         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4395         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4396         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4397         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4398         if (compute_eigs) {
4399           PetscReal lambda_max_s,lambda_min_s;
4400           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4401           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4402           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4403           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4404           for (i=0;i<neigs;i++) {
4405             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4406           }
4407         }
4408         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4409         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4410       }
4411       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4412       if (compute_eigs) {
4413         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4414         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4415       }
4416     }
4417   }
4418   /* print additional info */
4419   if (pcbddc->dbg_flag) {
4420     /* waits until all processes reaches this point */
4421     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4422     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4423     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4424   }
4425 
4426   /* free memory */
4427   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4428   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4429   PetscFunctionReturn(0);
4430 }
4431 
4432 #undef __FUNCT__
4433 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4434 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4435 {
4436   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4437   PC_IS*         pcis = (PC_IS*)pc->data;
4438   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4439   PetscInt       i,coarse_size;
4440   PetscInt       *local_primal_indices;
4441   PetscErrorCode ierr;
4442 
4443   PetscFunctionBegin;
4444   /* Compute global number of coarse dofs */
4445   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
4446     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
4447   }
4448   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
4449 
4450   /* check numbering */
4451   if (pcbddc->dbg_flag) {
4452     PetscScalar coarsesum,*array;
4453     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4454 
4455     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4456     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4457     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4458     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4459     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4460     for (i=0;i<pcbddc->local_primal_size;i++) {
4461       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4462     }
4463     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4464     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4465     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4466     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4467     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4468     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4469     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4470     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4471     for (i=0;i<pcis->n;i++) {
4472       if (array[i] == 1.0) {
4473         set_error = PETSC_TRUE;
4474         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4475       }
4476     }
4477     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4478     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4479     for (i=0;i<pcis->n;i++) {
4480       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4481     }
4482     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4483     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4484     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4485     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4486     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4487     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4488     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4489       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4490       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4491       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4492       for (i=0;i<pcbddc->local_primal_size;i++) {
4493         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
4494       }
4495       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4496     }
4497     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4498     if (set_error_reduced) {
4499       SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4500     }
4501   }
4502   /* get back data */
4503   *coarse_size_n = coarse_size;
4504   *local_primal_indices_n = local_primal_indices;
4505   PetscFunctionReturn(0);
4506 }
4507 
4508 #undef __FUNCT__
4509 #define __FUNCT__ "PCBDDCGlobalToLocal"
4510 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4511 {
4512   IS             localis_t;
4513   PetscInt       i,lsize,*idxs,n;
4514   PetscScalar    *vals;
4515   PetscErrorCode ierr;
4516 
4517   PetscFunctionBegin;
4518   /* get indices in local ordering exploiting local to global map */
4519   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4520   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4521   for (i=0;i<lsize;i++) vals[i] = 1.0;
4522   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4523   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4524   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4525   if (idxs) { /* multilevel guard */
4526     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4527   }
4528   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4529   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4530   ierr = PetscFree(vals);CHKERRQ(ierr);
4531   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4532   /* now compute set in local ordering */
4533   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4534   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4535   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4536   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4537   for (i=0,lsize=0;i<n;i++) {
4538     if (PetscRealPart(vals[i]) > 0.5) {
4539       lsize++;
4540     }
4541   }
4542   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4543   for (i=0,lsize=0;i<n;i++) {
4544     if (PetscRealPart(vals[i]) > 0.5) {
4545       idxs[lsize++] = i;
4546     }
4547   }
4548   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4549   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4550   *localis = localis_t;
4551   PetscFunctionReturn(0);
4552 }
4553 
4554 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4555 #undef __FUNCT__
4556 #define __FUNCT__ "PCBDDCMatMult_Private"
4557 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4558 {
4559   PCBDDCChange_ctx change_ctx;
4560   PetscErrorCode   ierr;
4561 
4562   PetscFunctionBegin;
4563   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4564   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4565   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4566   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4567   PetscFunctionReturn(0);
4568 }
4569 
4570 #undef __FUNCT__
4571 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4572 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4573 {
4574   PCBDDCChange_ctx change_ctx;
4575   PetscErrorCode   ierr;
4576 
4577   PetscFunctionBegin;
4578   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4579   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4580   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4581   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4582   PetscFunctionReturn(0);
4583 }
4584 
4585 #undef __FUNCT__
4586 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4587 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4588 {
4589   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4590   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4591   PetscInt            *used_xadj,*used_adjncy;
4592   PetscBool           free_used_adj;
4593   PetscErrorCode      ierr;
4594 
4595   PetscFunctionBegin;
4596   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4597   free_used_adj = PETSC_FALSE;
4598   if (pcbddc->sub_schurs_layers == -1) {
4599     used_xadj = NULL;
4600     used_adjncy = NULL;
4601   } else {
4602     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4603       used_xadj = pcbddc->mat_graph->xadj;
4604       used_adjncy = pcbddc->mat_graph->adjncy;
4605     } else if (pcbddc->computed_rowadj) {
4606       used_xadj = pcbddc->mat_graph->xadj;
4607       used_adjncy = pcbddc->mat_graph->adjncy;
4608     } else {
4609       PetscBool      flg_row=PETSC_FALSE;
4610       const PetscInt *xadj,*adjncy;
4611       PetscInt       nvtxs;
4612 
4613       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4614       if (flg_row) {
4615         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4616         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4617         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4618         free_used_adj = PETSC_TRUE;
4619       } else {
4620         pcbddc->sub_schurs_layers = -1;
4621         used_xadj = NULL;
4622         used_adjncy = NULL;
4623       }
4624       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4625     }
4626   }
4627   ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->adaptive_selection,pcbddc->use_deluxe_scaling,pcbddc->adaptive_invert_Stildas,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4628 
4629   /* free adjacency */
4630   if (free_used_adj) {
4631     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4632   }
4633   PetscFunctionReturn(0);
4634 }
4635 
4636 #undef __FUNCT__
4637 #define __FUNCT__ "PCBDDCInitSubSchurs"
4638 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4639 {
4640   PC_IS               *pcis=(PC_IS*)pc->data;
4641   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4642   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4643   PCBDDCGraph         graph;
4644   Mat                 S_j;
4645   PetscErrorCode      ierr;
4646 
4647   PetscFunctionBegin;
4648   /* attach interface graph for determining subsets */
4649   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4650     IS verticesIS;
4651 
4652     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4653     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4654     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr);
4655     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr);
4656     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4657     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4658 /*
4659     if (pcbddc->dbg_flag) {
4660       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4661     }
4662 */
4663   } else {
4664     graph = pcbddc->mat_graph;
4665   }
4666 
4667   /* Create Schur complement matrix */
4668   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4669   ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4670 
4671   /* sub_schurs init */
4672   ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_threshold);CHKERRQ(ierr);
4673   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4674   /* free graph struct */
4675   if (pcbddc->sub_schurs_rebuild) {
4676     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4677   }
4678   PetscFunctionReturn(0);
4679 }
4680