xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 5a95e1ceb00aaf913daa567a70a604cb4fd63728)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscScalar     *Smult,*Seigv;
20   PetscReal       *eigs,thresh;
21   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscErrorCode  ierr;
26 
27   PetscFunctionBegin;
28   if (!sub_schurs->use_mumps) {
29     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
30   }
31 
32   if (pcbddc->dbg_flag) {
33     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
34     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
35     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
36     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
37   }
38 
39   if (pcbddc->dbg_flag) {
40     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
41   }
42 
43   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
44     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
45   }
46 
47   /* max size of subsets */
48   mss = 0;
49   for (i=0;i<sub_schurs->n_subs;i++) {
50     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
51       PetscInt subset_size;
52       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
53       mss = PetscMax(mss,subset_size);
54     }
55   }
56 
57   /* min/max and threshold */
58   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
59   nmin = pcbddc->adaptive_nmin > -1 ? pcbddc->adaptive_nmin : 1;
60   nmax = PetscMax(nmin,nmax);
61 
62   /* allocate lapack workspace */
63   cum = cum2 = 0;
64   maxneigs = 0;
65   for (i=0;i<sub_schurs->n_subs;i++) {
66     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
67       PetscInt n,subset_size;
68       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
69       n = PetscMin(subset_size,nmax);
70       cum += subset_size*n;
71       cum2 += n;
72       maxneigs = PetscMax(maxneigs,n);
73     }
74   }
75 
76   if (mss) {
77     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
78       PetscBLASInt B_itype = 1;
79       PetscBLASInt B_N = mss;
80       PetscReal    zero = 0.0;
81       PetscReal    eps = 0.0; /* dlamch? */
82 
83       B_lwork = -1;
84       S = NULL;
85       St = NULL;
86       eigs = NULL;
87       eigv = NULL;
88       B_iwork = NULL;
89       B_ifail = NULL;
90       thresh = 1.0;
91       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
92 #if defined(PETSC_USE_COMPLEX)
93       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
94 #else
95       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
96 #endif
97       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
98       ierr = PetscFPTrapPop();CHKERRQ(ierr);
99     } else {
100         /* TODO */
101     }
102   } else {
103     lwork = 0;
104   }
105 
106   nv = 0;
107   if (sub_schurs->is_Ej_com && pcbddc->use_vertices) { /* complement of subsets, each entry is a vertex */
108     ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr);
109   }
110   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
111   ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs,
112                       B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
113 #if defined(PETSC_USE_COMPLEX)
114   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
115 #endif
116   ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr);
117   ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
118                       nv+cum2+1,&pcbddc->adaptive_constraints_ptrs,
119                       nv+cum,&pcbddc->adaptive_constraints_idxs,
120                       nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
121   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
122 
123   maxneigs = 0;
124   cum = cum2 = cumarray = 0;
125   if (sub_schurs->is_Ej_com && pcbddc->use_vertices) {
126     const PetscInt *idxs;
127 
128     ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
129     for (cum=0;cum<nv;cum++) {
130       pcbddc->adaptive_constraints_n[cum] = 1;
131       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
132       pcbddc->adaptive_constraints_ptrs[cum] = cum;
133       pcbddc->adaptive_constraints_data[cum] = 1.0;
134     }
135     cum2 = cum;
136     ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
137   }
138 
139   if (mss) { /* multilevel */
140     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
141     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
142   }
143 
144   for (i=0;i<sub_schurs->n_subs;i++) {
145     PetscInt j,subset_size;
146 
147     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
148     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
149       const PetscInt *idxs;
150       PetscScalar    one = 1.0,scalar_zero = 0.0;
151       PetscReal      zero=0.0;
152       PetscBLASInt   B_N;
153 
154       /* S should be copied since we need it for deluxe scaling */
155       if (sub_schurs->is_hermitian) {
156         PetscInt j;
157         for (j=0;j<subset_size;j++) {
158           ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
159         }
160         for (j=0;j<subset_size;j++) {
161           ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
162         }
163       } else {
164         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
165         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
166       }
167       /* is always this the right matrix? */
168       ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
169 
170       /* we could reuse space already allocated when building sum_S_Ej_tilda_all */
171       /* St = Starray+cumarray; */
172       ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
173       if (pcbddc->adaptive_threshold > 1.0) { /* this is an heuristic for edges */
174         thresh = (1.*pcbddc->mat_graph->count[idxs[0]])/(pcbddc->adaptive_threshold);
175       } else {
176         thresh = 1.0;
177       }
178 
179       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
180         PetscBLASInt B_itype = 1;
181         PetscBLASInt B_IL = 1, B_IU;
182         PetscReal    eps = -1.0; /* dlamch? */
183         PetscInt     nmin_s;
184 
185         ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
186         /* ask for eigenvalues lower than thresh */
187         if (pcbddc->dbg_flag) {
188           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
189         }
190         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
191 #if defined(PETSC_USE_COMPLEX)
192         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
193 #else
194         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
195 #endif
196         ierr = PetscFPTrapPop();CHKERRQ(ierr);
197         if (B_ierr) {
198           if (B_ierr < 0 ) {
199             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
200           } else if (B_ierr <= B_N) {
201             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
202           } else {
203             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
204           }
205         }
206 
207         if (B_neigs > nmax) {
208           if (pcbddc->dbg_flag) {
209             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
210           }
211           B_neigs = nmax;
212         }
213 
214         nmin_s = PetscMin(nmin,B_N);
215         if (B_neigs < nmin_s) {
216           PetscBLASInt B_neigs2;
217 
218           B_IL = B_neigs + 1;
219           ierr = PetscBLASIntCast(nmin_s,&B_IU);CHKERRQ(ierr);
220           if (pcbddc->dbg_flag) {
221             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
222           }
223           if (sub_schurs->is_hermitian) {
224             PetscInt j;
225             for (j=0;j<subset_size;j++) {
226               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
227             }
228             for (j=0;j<subset_size;j++) {
229               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
230             }
231           } else {
232             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
233             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
234           }
235           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
236 #if defined(PETSC_USE_COMPLEX)
237           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
238 #else
239           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
240 #endif
241           ierr = PetscFPTrapPop();CHKERRQ(ierr);
242           B_neigs += B_neigs2;
243         }
244         if (B_ierr) {
245           if (B_ierr < 0 ) {
246             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
247           } else if (B_ierr <= B_N) {
248             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
249           } else {
250             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
251           }
252         }
253         if (pcbddc->dbg_flag) {
254           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);
255           for (j=0;j<B_neigs;j++) {
256             if (eigs[j] == 0.0) {
257               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");
258             } else {
259               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",1.0/eigs[j]);
260             }
261           }
262         }
263       } else {
264           /* TODO */
265       }
266 
267       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
268       if (B_neigs) {
269         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&scalar_zero,Seigv,&B_N));
270       }
271       maxneigs = PetscMax(B_neigs,maxneigs);
272       pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
273       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
274 
275       if (pcbddc->dbg_flag > 1) {
276         PetscInt ii;
277         for (ii=0;ii<B_neigs;ii++) {
278           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
279           for (j=0;j<B_N;j++) {
280             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e %1.4e\n",eigv[ii*B_N+j],Seigv[ii*B_N+j]);
281           }
282         }
283       }
284       for (j=0;j<B_neigs;j++) {
285 #if 0
286         {
287           PetscBLASInt Blas_N,Blas_one = 1.0;
288           PetscScalar norm;
289           ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
290           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
291           if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
292             norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
293           } else {
294             norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
295           }
296           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
297         }
298 #endif
299         ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
300         pcbddc->adaptive_constraints_ptrs[cum++] = cum2;
301         cum2 += subset_size;
302       }
303       ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
304     }
305     /* shift for next computation */
306     cumarray += subset_size*subset_size;
307   }
308   if (pcbddc->dbg_flag) {
309     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
310   }
311   pcbddc->adaptive_constraints_ptrs[cum] = cum2;
312   ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr);
313 
314   if (mss) {
315     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
316     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
317   }
318   ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
319 #if defined(PETSC_USE_COMPLEX)
320   ierr = PetscFree(rwork);CHKERRQ(ierr);
321 #endif
322   if (pcbddc->dbg_flag) {
323     PetscInt maxneigs_r;
324     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
325     ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
326   }
327   PetscFunctionReturn(0);
328 }
329 
330 #undef __FUNCT__
331 #define __FUNCT__ "PCBDDCSetUpSolvers"
332 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
333 {
334   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
335   PetscScalar    *coarse_submat_vals;
336   PetscErrorCode ierr;
337 
338   PetscFunctionBegin;
339   /* Setup local scatters R_to_B and (optionally) R_to_D */
340   /* PCBDDCSetUpLocalWorkVectors should be called first! */
341   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
342 
343   /* Setup local neumann solver ksp_R */
344   /* PCBDDCSetUpLocalScatters should be called first! */
345   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
346 
347   /* Change global null space passed in by the user if change of basis has been requested */
348   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
349     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
350   }
351 
352   /*
353      Setup local correction and local part of coarse basis.
354      Gives back the dense local part of the coarse matrix in column major ordering
355   */
356   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
357 
358   /* Compute total number of coarse nodes and setup coarse solver */
359   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
360 
361   /* free */
362   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
363   PetscFunctionReturn(0);
364 }
365 
366 #undef __FUNCT__
367 #define __FUNCT__ "PCBDDCResetCustomization"
368 PetscErrorCode PCBDDCResetCustomization(PC pc)
369 {
370   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
371   PetscErrorCode ierr;
372 
373   PetscFunctionBegin;
374   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
375   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
376   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
377   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
378   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
379   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
380   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
381   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
382   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
383   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
384   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
385   PetscFunctionReturn(0);
386 }
387 
388 #undef __FUNCT__
389 #define __FUNCT__ "PCBDDCResetTopography"
390 PetscErrorCode PCBDDCResetTopography(PC pc)
391 {
392   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
393   PetscErrorCode ierr;
394 
395   PetscFunctionBegin;
396   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
397   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
398   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
399   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
400   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
401   PetscFunctionReturn(0);
402 }
403 
404 #undef __FUNCT__
405 #define __FUNCT__ "PCBDDCResetSolvers"
406 PetscErrorCode PCBDDCResetSolvers(PC pc)
407 {
408   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
409   PetscScalar    *array;
410   PetscErrorCode ierr;
411 
412   PetscFunctionBegin;
413   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
414   if (pcbddc->coarse_phi_B) {
415     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
416     ierr = PetscFree(array);CHKERRQ(ierr);
417   }
418   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
419   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
420   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
421   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
422   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
423   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
424   if (pcbddc->local_auxmat2) {
425     ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
426     ierr = PetscFree(array);CHKERRQ(ierr);
427   }
428   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
429   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
430   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
431   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
432   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
433   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
434   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
435   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
436   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
437   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
438   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
439   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
440   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
441   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
442   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
443   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
444   PetscFunctionReturn(0);
445 }
446 
447 #undef __FUNCT__
448 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
449 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
450 {
451   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
452   PC_IS          *pcis = (PC_IS*)pc->data;
453   VecType        impVecType;
454   PetscInt       n_constraints,n_R,old_size;
455   PetscErrorCode ierr;
456 
457   PetscFunctionBegin;
458   if (!pcbddc->ConstraintMatrix) {
459     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
460   }
461   /* get sizes */
462   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
463   n_R = pcis->n-pcbddc->n_actual_vertices;
464   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
465   /* local work vectors (try to avoid unneeded work)*/
466   /* R nodes */
467   old_size = -1;
468   if (pcbddc->vec1_R) {
469     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
470   }
471   if (n_R != old_size) {
472     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
473     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
474     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
475     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
476     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
477     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
478   }
479   /* local primal dofs */
480   old_size = -1;
481   if (pcbddc->vec1_P) {
482     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
483   }
484   if (pcbddc->local_primal_size != old_size) {
485     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
486     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
487     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
488     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
489   }
490   /* local explicit constraints */
491   old_size = -1;
492   if (pcbddc->vec1_C) {
493     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
494   }
495   if (n_constraints && n_constraints != old_size) {
496     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
497     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
498     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
499     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
500   }
501   PetscFunctionReturn(0);
502 }
503 
504 #undef __FUNCT__
505 #define __FUNCT__ "PCBDDCSetUpCorrection"
506 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
507 {
508   PetscErrorCode         ierr;
509   /* pointers to pcis and pcbddc */
510   PC_IS*                 pcis = (PC_IS*)pc->data;
511   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
512   /* submatrices of local problem */
513   Mat                    A_RV,A_VR,A_VV;
514   /* submatrices of local coarse problem */
515   Mat                    S_VV,S_CV,S_VC,S_CC;
516   /* working matrices */
517   Mat                    C_CR;
518   /* additional working stuff */
519   PC                     pc_R;
520   Mat                    F;
521   PetscBool              isLU,isCHOL,isILU;
522 
523   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
524   PetscScalar            *work;
525   PetscInt               *idx_V_B;
526   PetscInt               n,n_vertices,n_constraints;
527   PetscInt               i,n_R,n_D,n_B;
528   PetscBool              unsymmetric_check;
529   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
530   MatType                impMatType;
531   /* some shortcuts to scalars */
532   PetscScalar            one=1.0,m_one=-1.0;
533 
534   PetscFunctionBegin;
535   /* get number of vertices (corners plus constraints with change of basis)
536      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
537   n_vertices = pcbddc->n_actual_vertices;
538   n_constraints = pcbddc->local_primal_size-n_vertices;
539   /* Set Non-overlapping dimensions */
540   n_B = pcis->n_B; n_D = pcis->n - n_B;
541   n_R = pcis->n-n_vertices;
542 
543   /* Set types for local objects needed by BDDC precondtioner */
544   impMatType = MATSEQDENSE;
545 
546   /* vertices in boundary numbering */
547   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
548   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
549   if (i != n_vertices) {
550     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
551   }
552 
553   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
554   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
555   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
556   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
557   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
558   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
559   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
560   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
561   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
562   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
563 
564   unsymmetric_check = PETSC_FALSE;
565   /* allocate workspace */
566   n = 0;
567   if (n_constraints) {
568     n += n_R*n_constraints;
569   }
570   if (n_vertices) {
571     n = PetscMax(2*n_R*n_vertices,n);
572   }
573   if (!pcbddc->issym) {
574     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
575     unsymmetric_check = PETSC_TRUE;
576   }
577   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
578 
579   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
580   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
581   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
582   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
583   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
584   if (isLU || isILU || isCHOL) {
585     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
586   } else {
587     F = NULL;
588   }
589 
590   /* Precompute stuffs needed for preprocessing and application of BDDC*/
591   if (n_constraints) {
592     Mat M1,M2,M3;
593     IS  is_aux;
594     /* see if we can save some allocations */
595     if (pcbddc->local_auxmat2) {
596       PetscInt on_R,on_constraints;
597       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
598       if (on_R != n_R || on_constraints != n_constraints) {
599         PetscScalar *marray;
600 
601         ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr);
602         ierr = PetscFree(marray);CHKERRQ(ierr);
603         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
604         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
605       }
606     }
607     /* auxiliary matrices */
608     if (!pcbddc->local_auxmat2) {
609       PetscScalar *marray;
610 
611       ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr);
612       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr);
613       marray += n_R*n_constraints;
614       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr);
615     }
616 
617     /* Extract constraints on R nodes: C_{CR}  */
618     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
619     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
620     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
621 
622     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
623     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
624     for (i=0;i<n_constraints;i++) {
625       const PetscScalar *row_cmat_values;
626       const PetscInt    *row_cmat_indices;
627       PetscInt          size_of_constraint,j;
628 
629       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
630       for (j=0;j<size_of_constraint;j++) {
631         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
632       }
633       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
634     }
635     if (F) {
636       Mat B;
637 
638       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
639       ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr);
640       ierr = MatDestroy(&B);CHKERRQ(ierr);
641     } else {
642       PetscScalar *xarray;
643       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
644       for (i=0;i<n_constraints;i++) {
645         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
646         ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr);
647         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
648         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
649         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
650       }
651       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
652     }
653 
654     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
655     ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
656     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
657     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
658     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
659     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
660     ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr);
661     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
662     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
663     ierr = MatDestroy(&M2);CHKERRQ(ierr);
664     ierr = MatDestroy(&M3);CHKERRQ(ierr);
665     /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */
666     ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
667     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
668     ierr = MatDestroy(&M1);CHKERRQ(ierr);
669   }
670   /* Get submatrices from subdomain matrix */
671   if (n_vertices) {
672     Mat       newmat;
673     IS        is_aux;
674     PetscInt  ibs,mbs;
675     PetscBool issbaij;
676 
677     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
678     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
679     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
680     if (ibs != mbs) { /* need to convert to SEQAIJ */
681       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
682       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
683       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
684       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
685       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
686     } else {
687       /* this is safe */
688       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
689       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
690       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
691         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
692         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
693         ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
694         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
695         ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
696       } else {
697         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
698         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
699       }
700     }
701     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
702   }
703 
704   /* Matrix of coarse basis functions (local) */
705   if (pcbddc->coarse_phi_B) {
706     PetscInt on_B,on_primal,on_D=n_D;
707     if (pcbddc->coarse_phi_D) {
708       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
709     }
710     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
711     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
712       PetscScalar *marray;
713 
714       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
715       ierr = PetscFree(marray);CHKERRQ(ierr);
716       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
717       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
718       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
719       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
720     }
721   }
722 
723   if (!pcbddc->coarse_phi_B) {
724     PetscScalar *marray;
725 
726     n = n_B*pcbddc->local_primal_size;
727     if (pcbddc->switch_static || pcbddc->dbg_flag) {
728       n += n_D*pcbddc->local_primal_size;
729     }
730     if (!pcbddc->issym) {
731       n *= 2;
732     }
733     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
734     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
735     n = n_B*pcbddc->local_primal_size;
736     if (pcbddc->switch_static || pcbddc->dbg_flag) {
737       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
738       n += n_D*pcbddc->local_primal_size;
739     }
740     if (!pcbddc->issym) {
741       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
742       if (pcbddc->switch_static || pcbddc->dbg_flag) {
743         n = n_B*pcbddc->local_primal_size;
744         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
745       }
746     } else {
747       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
748       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
749       if (pcbddc->switch_static || pcbddc->dbg_flag) {
750         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
751         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
752       }
753     }
754   }
755   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
756   /* vertices */
757   if (n_vertices) {
758 
759     if (n_R) {
760       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
761       PetscBLASInt B_N,B_one = 1;
762       PetscScalar  *x,*y;
763 
764       ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
765       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
766       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
767       if (F) {
768         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
769       } else {
770         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
771         for (i=0;i<n_vertices;i++) {
772           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
773           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
774           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
775           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
776           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
777         }
778         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
779       }
780       ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr);
781       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
782       if (n_constraints) {
783         Mat B;
784         ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
785         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
786         ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
787         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
788         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
789         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
790         ierr = MatDestroy(&B);CHKERRQ(ierr);
791       }
792       ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
793       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
794       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
795       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
796       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
797       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
798       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
799       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
800       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
801       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
802       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
803       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
804     } else {
805       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
806       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
807     }
808     /* coarse basis functions */
809     for (i=0;i<n_vertices;i++) {
810       PetscScalar *y;
811 
812       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
813       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
814       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
815       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
816       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
817       y[n_B*i+idx_V_B[i]] = 1.0;
818       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
819       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
820 
821       if (pcbddc->switch_static || pcbddc->dbg_flag) {
822         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
823         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
824         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
825         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
826         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
827         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
828       }
829       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
830     }
831     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
832     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
833   }
834 
835   if (n_constraints) {
836     Mat B;
837 
838     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
839     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
840     ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
841     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
842     if (n_vertices) {
843       ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr);
844     }
845     ierr = MatDestroy(&B);CHKERRQ(ierr);
846     /* coarse basis functions */
847     for (i=0;i<n_constraints;i++) {
848       PetscScalar *y;
849 
850       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
851       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
852       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
853       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
854       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
855       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
856       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
857 
858       if (pcbddc->switch_static || pcbddc->dbg_flag) {
859         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
860         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
861         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
862         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
863         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
864         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
865       }
866       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
867     }
868   }
869 
870   /* compute other basis functions for non-symmetric problems */
871   if (!pcbddc->issym) {
872     Mat B,X;
873 
874     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr);
875 
876     if (n_constraints) {
877       Mat S_CCT,B_C;
878 
879       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
880       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
881       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
882       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
883       if (n_vertices) {
884         Mat B_V,S_VCT;
885 
886         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
887         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
888         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
889         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
890         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
891       }
892       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
893     }
894     if (n_vertices && n_R) {
895       Mat          A_VRT;
896       PetscBLASInt B_N,B_one = 1;
897 
898       if (!n_constraints) { /* if there are no constraints, reset work */
899         ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
900       }
901       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr);
902       ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
903       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
904       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one));
905       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
906     }
907 
908     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr);
909     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
910       for (i=0;i<pcbddc->local_primal_size;i++) {
911         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
912         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
913         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
914         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
915         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
916       }
917     } else {
918       for (i=0;i<pcbddc->local_primal_size;i++) {
919         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
920         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
921         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
922         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
923         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
924       }
925     }
926     ierr = MatDestroy(&B);CHKERRQ(ierr);
927     /* coarse basis functions */
928     for (i=0;i<pcbddc->local_primal_size;i++) {
929       PetscScalar *y;
930 
931       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
932       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
933       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
934       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
935       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
936       if (i<n_vertices) {
937         y[n_B*i+idx_V_B[i]] = 1.0;
938       }
939       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
940       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
941 
942       if (pcbddc->switch_static || pcbddc->dbg_flag) {
943         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
944         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
945         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
946         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
947         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
948         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
949       }
950       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
951     }
952     ierr = MatDestroy(&X);CHKERRQ(ierr);
953   }
954 
955   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
956   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
957   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
958   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
959   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
960   /* Checking coarse_sub_mat and coarse basis functios */
961   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
962   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
963   if (pcbddc->dbg_flag) {
964     Mat         coarse_sub_mat;
965     Mat         AUXMAT,TM1,TM2,TM3,TM4;
966     Mat         coarse_phi_D,coarse_phi_B;
967     Mat         coarse_psi_D,coarse_psi_B;
968     Mat         A_II,A_BB,A_IB,A_BI;
969     Mat         C_B,CPHI;
970     IS          is_dummy;
971     Vec         mones;
972     MatType     checkmattype=MATSEQAIJ;
973     PetscReal   real_value;
974 
975     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
976     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
977     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
978     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
979     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
980     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
981     if (unsymmetric_check) {
982       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
983       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
984     }
985     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
986 
987     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
988     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr);
989     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
990     if (unsymmetric_check) {
991       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
992       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
993       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
994       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
995       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
996       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
997       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
998       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
999       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1000       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1001       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1002       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1003     } else {
1004       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1005       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1006       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1007       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1008       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1009       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1010       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1011       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1012     }
1013     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1014     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1015     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1016     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1017     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1018     ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1019     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1020     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d      matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1021 
1022     /* check constraints */
1023     ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr);
1024     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1025     ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1026     ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr);
1027     ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1028     ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1029     ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1030     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1031     ierr = MatDestroy(&C_B);CHKERRQ(ierr);
1032     ierr = MatDestroy(&CPHI);CHKERRQ(ierr);
1033     ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1034     ierr = VecDestroy(&mones);CHKERRQ(ierr);
1035 
1036     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1037     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1038     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1039     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1040     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1041     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1042     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1043     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1044     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1045     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1046     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1047     if (unsymmetric_check) {
1048       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1049       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1050     }
1051     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1052   }
1053 
1054   /* free memory */
1055   ierr = PetscFree(work);CHKERRQ(ierr);
1056   if (n_vertices) {
1057     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1058   }
1059   if (n_constraints) {
1060     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1061   }
1062   /* get back data */
1063   *coarse_submat_vals_n = coarse_submat_vals;
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 #undef __FUNCT__
1068 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1069 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1070 {
1071   Mat            *work_mat;
1072   IS             isrow_s,iscol_s;
1073   PetscBool      rsorted,csorted;
1074   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1075   PetscErrorCode ierr;
1076 
1077   PetscFunctionBegin;
1078   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1079   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1080   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1081   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1082 
1083   if (!rsorted) {
1084     const PetscInt *idxs;
1085     PetscInt *idxs_sorted,i;
1086 
1087     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1088     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1089     for (i=0;i<rsize;i++) {
1090       idxs_perm_r[i] = i;
1091     }
1092     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1093     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1094     for (i=0;i<rsize;i++) {
1095       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1096     }
1097     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1098     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1099   } else {
1100     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1101     isrow_s = isrow;
1102   }
1103 
1104   if (!csorted) {
1105     if (isrow == iscol) {
1106       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1107       iscol_s = isrow_s;
1108     } else {
1109       const PetscInt *idxs;
1110       PetscInt *idxs_sorted,i;
1111 
1112       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1113       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1114       for (i=0;i<csize;i++) {
1115         idxs_perm_c[i] = i;
1116       }
1117       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1118       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1119       for (i=0;i<csize;i++) {
1120         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1121       }
1122       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1123       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1124     }
1125   } else {
1126     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1127     iscol_s = iscol;
1128   }
1129 
1130   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1131 
1132   if (!rsorted || !csorted) {
1133     Mat      new_mat;
1134     IS       is_perm_r,is_perm_c;
1135 
1136     if (!rsorted) {
1137       PetscInt *idxs_r,i;
1138       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1139       for (i=0;i<rsize;i++) {
1140         idxs_r[idxs_perm_r[i]] = i;
1141       }
1142       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1143       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1144     } else {
1145       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1146     }
1147     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1148 
1149     if (!csorted) {
1150       if (isrow_s == iscol_s) {
1151         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1152         is_perm_c = is_perm_r;
1153       } else {
1154         PetscInt *idxs_c,i;
1155         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1156         for (i=0;i<csize;i++) {
1157           idxs_c[idxs_perm_c[i]] = i;
1158         }
1159         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1160         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1161       }
1162     } else {
1163       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1164     }
1165     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1166 
1167     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1168     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1169     work_mat[0] = new_mat;
1170     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1171     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1172   }
1173 
1174   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1175   *B = work_mat[0];
1176   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1177   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1178   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 #undef __FUNCT__
1183 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1184 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1185 {
1186   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1187   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1188   Mat            new_mat;
1189   IS             is_local,is_global;
1190   PetscInt       local_size;
1191   PetscBool      isseqaij;
1192   PetscErrorCode ierr;
1193 
1194   PetscFunctionBegin;
1195   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1196   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1197   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1198   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1199   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1200   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1201   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1202 
1203   /* check */
1204   if (pcbddc->dbg_flag) {
1205     Vec       x,x_change;
1206     PetscReal error;
1207 
1208     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1209     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1210     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1211     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1212     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1213     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1214     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1215     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1216     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1217     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1218     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1219     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1220     ierr = VecDestroy(&x);CHKERRQ(ierr);
1221     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1222   }
1223 
1224   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1225   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1226   if (isseqaij) {
1227     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1228   } else {
1229     Mat work_mat;
1230     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1231     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1232     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1233   }
1234   ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
1235 #if !defined(PETSC_USE_COMPLEX)
1236   ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,pcbddc->issym);CHKERRQ(ierr);
1237 #endif
1238   /*
1239   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1240   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1241   */
1242   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1243   PetscFunctionReturn(0);
1244 }
1245 
1246 #undef __FUNCT__
1247 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1248 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1249 {
1250   PC_IS*         pcis = (PC_IS*)(pc->data);
1251   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1252   IS             is_aux1,is_aux2;
1253   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1254   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
1255   PetscInt       vbs,bs;
1256   PetscBT        bitmask;
1257   PetscErrorCode ierr;
1258 
1259   PetscFunctionBegin;
1260   /*
1261     No need to setup local scatters if
1262       - primal space is unchanged
1263         AND
1264       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1265         AND
1266       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1267   */
1268   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1269     PetscFunctionReturn(0);
1270   }
1271   /* destroy old objects */
1272   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1273   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1274   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1275   /* Set Non-overlapping dimensions */
1276   n_B = pcis->n_B; n_D = pcis->n - n_B;
1277   n_vertices = pcbddc->n_actual_vertices;
1278   /* create auxiliary bitmask */
1279   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1280   for (i=0;i<n_vertices;i++) {
1281     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
1282   }
1283 
1284   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1285   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1286   for (i=0, n_R=0; i<pcis->n; i++) {
1287     if (!PetscBTLookup(bitmask,i)) {
1288       idx_R_local[n_R] = i;
1289       n_R++;
1290     }
1291   }
1292 
1293   /* Block code */
1294   vbs = 1;
1295   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1296   if (bs>1 && !(n_vertices%bs)) {
1297     PetscBool is_blocked = PETSC_TRUE;
1298     PetscInt  *vary;
1299     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1300     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1301     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1302     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
1303     for (i=0; i<n_vertices/bs; i++) {
1304       if (vary[i]!=0 && vary[i]!=bs) {
1305         is_blocked = PETSC_FALSE;
1306         break;
1307       }
1308     }
1309     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1310       vbs = bs;
1311       for (i=0;i<n_R/vbs;i++) {
1312         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1313       }
1314     }
1315     ierr = PetscFree(vary);CHKERRQ(ierr);
1316   }
1317   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1318   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1319 
1320   /* print some info if requested */
1321   if (pcbddc->dbg_flag) {
1322     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1323     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1324     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1325     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1326     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1327     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1328     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
1329     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1330   }
1331 
1332   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1333   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1334   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1335   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1336   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1337   for (i=0; i<n_D; i++) {
1338     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1339   }
1340   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1341   for (i=0, j=0; i<n_R; i++) {
1342     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1343       aux_array1[j++] = i;
1344     }
1345   }
1346   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1347   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1348   for (i=0, j=0; i<n_B; i++) {
1349     if (!PetscBTLookup(bitmask,is_indices[i])) {
1350       aux_array2[j++] = i;
1351     }
1352   }
1353   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1354   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1355   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1356   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1357   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1358 
1359   if (pcbddc->switch_static || pcbddc->dbg_flag) {
1360     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1361     for (i=0, j=0; i<n_R; i++) {
1362       if (PetscBTLookup(bitmask,idx_R_local[i])) {
1363         aux_array1[j++] = i;
1364       }
1365     }
1366     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1367     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1368     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1369   }
1370   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1371   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1372   PetscFunctionReturn(0);
1373 }
1374 
1375 
1376 #undef __FUNCT__
1377 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1378 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1379 {
1380   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1381   PC_IS          *pcis = (PC_IS*)pc->data;
1382   PC             pc_temp;
1383   Mat            A_RR;
1384   MatReuse       reuse;
1385   PetscScalar    m_one = -1.0;
1386   PetscReal      value;
1387   PetscInt       n_D,n_R,ibs,mbs;
1388   PetscBool      use_exact,use_exact_reduced,issbaij;
1389   PetscErrorCode ierr;
1390   /* prefixes stuff */
1391   char           dir_prefix[256],neu_prefix[256],str_level[16];
1392   size_t         len;
1393 
1394   PetscFunctionBegin;
1395 
1396   /* compute prefixes */
1397   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1398   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1399   if (!pcbddc->current_level) {
1400     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1401     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1402     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1403     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1404   } else {
1405     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1406     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1407     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1408     len -= 15; /* remove "pc_bddc_coarse_" */
1409     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1410     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1411     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1412     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1413     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1414     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1415     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1416     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1417   }
1418 
1419   /* DIRICHLET PROBLEM */
1420   if (dirichlet) {
1421     if (pcbddc->issym) {
1422       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1423     }
1424     /* Matrix for Dirichlet problem is pcis->A_II */
1425     n_D = pcis->n - pcis->n_B;
1426     if (!pcbddc->ksp_D) { /* create object if not yet build */
1427       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1428       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1429       /* default */
1430       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1431       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1432       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1433       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1434       if (issbaij) {
1435         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1436       } else {
1437         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1438       }
1439       /* Allow user's customization */
1440       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1441       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1442     }
1443     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1444     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1445     if (!n_D) {
1446       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1447       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1448     }
1449     /* Set Up KSP for Dirichlet problem of BDDC */
1450     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1451     /* set ksp_D into pcis data */
1452     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1453     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1454     pcis->ksp_D = pcbddc->ksp_D;
1455   }
1456 
1457   /* NEUMANN PROBLEM */
1458   A_RR = 0;
1459   if (neumann) {
1460     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1461     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1462     if (pcbddc->ksp_R) { /* already created ksp */
1463       PetscInt nn_R;
1464       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1465       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1466       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1467       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1468         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1469         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1470         reuse = MAT_INITIAL_MATRIX;
1471       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1472         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1473           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1474           reuse = MAT_INITIAL_MATRIX;
1475         } else { /* safe to reuse the matrix */
1476           reuse = MAT_REUSE_MATRIX;
1477         }
1478       }
1479       /* last check */
1480       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1481         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1482         reuse = MAT_INITIAL_MATRIX;
1483       }
1484     } else { /* first time, so we need to create the matrix */
1485       reuse = MAT_INITIAL_MATRIX;
1486     }
1487     /* extract A_RR */
1488     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1489     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1490     if (ibs != mbs) {
1491       Mat newmat;
1492       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1493       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1494       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1495     } else {
1496       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1497     }
1498     if (pcbddc->issym) {
1499       ierr = MatSetOption(A_RR,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1500     }
1501     if (!pcbddc->ksp_R) { /* create object if not present */
1502       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1503       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1504       /* default */
1505       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1506       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1507       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1508       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1509       if (issbaij) {
1510         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1511       } else {
1512         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1513       }
1514       /* Allow user's customization */
1515       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1516       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1517     }
1518     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1519     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1520     if (!n_R) {
1521       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1522       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1523     }
1524     /* Set Up KSP for Neumann problem of BDDC */
1525     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1526   }
1527 
1528   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1529   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1530     if (pcbddc->dbg_flag) {
1531       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1532       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1533       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1534     }
1535     if (dirichlet) { /* Dirichlet */
1536       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1537       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1538       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1539       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1540       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1541       /* need to be adapted? */
1542       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1543       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1544       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1545       /* print info */
1546       if (pcbddc->dbg_flag) {
1547         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1548         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1549       }
1550       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1551         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1552       }
1553     }
1554     if (neumann) { /* Neumann */
1555       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1556       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1557       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1558       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1559       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1560       /* need to be adapted? */
1561       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1562       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1563       /* print info */
1564       if (pcbddc->dbg_flag) {
1565         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1566         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1567       }
1568       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1569         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1570       }
1571     }
1572   }
1573   /* free Neumann problem's matrix */
1574   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1575   PetscFunctionReturn(0);
1576 }
1577 
1578 #undef __FUNCT__
1579 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1580 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1581 {
1582   PetscErrorCode ierr;
1583   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1584 
1585   PetscFunctionBegin;
1586   if (applytranspose) {
1587     if (pcbddc->local_auxmat1) {
1588       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1589       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1590     }
1591     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1592   } else {
1593     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1594     if (pcbddc->local_auxmat1) {
1595       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1596       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1597     }
1598   }
1599   PetscFunctionReturn(0);
1600 }
1601 
1602 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1603 #undef __FUNCT__
1604 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1605 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1606 {
1607   PetscErrorCode ierr;
1608   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1609   PC_IS*            pcis = (PC_IS*)  (pc->data);
1610   const PetscScalar zero = 0.0;
1611 
1612   PetscFunctionBegin;
1613   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1614   if (applytranspose) {
1615     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1616     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1617   } else {
1618     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1619     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1620   }
1621   /* start communications from local primal nodes to rhs of coarse solver */
1622   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1623   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1624   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1625 
1626   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1627   /* TODO remove null space when doing multilevel */
1628   if (pcbddc->coarse_ksp) {
1629     Vec rhs,sol;
1630 
1631     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
1632     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
1633     if (applytranspose) {
1634       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1635     } else {
1636       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1637     }
1638   }
1639 
1640   /* Local solution on R nodes */
1641   if (pcis->n) {
1642     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1643     ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1644     ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1645     if (pcbddc->switch_static) {
1646       ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1647       ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1648     }
1649     ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1650     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1651     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1652     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1653     if (pcbddc->switch_static) {
1654       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1655       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1656     }
1657   }
1658 
1659   /* communications from coarse sol to local primal nodes */
1660   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1661   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1662 
1663   /* Sum contributions from two levels */
1664   if (applytranspose) {
1665     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1666     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1667   } else {
1668     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1669     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1670   }
1671   PetscFunctionReturn(0);
1672 }
1673 
1674 #undef __FUNCT__
1675 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1676 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1677 {
1678   PetscErrorCode ierr;
1679   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1680   PetscScalar    *array;
1681   Vec            from,to;
1682 
1683   PetscFunctionBegin;
1684   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1685     from = pcbddc->coarse_vec;
1686     to = pcbddc->vec1_P;
1687     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1688       Vec tvec;
1689 
1690       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1691       ierr = VecResetArray(tvec);CHKERRQ(ierr);
1692       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1693       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
1694       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
1695       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
1696     }
1697   } else { /* from local to global -> put data in coarse right hand side */
1698     from = pcbddc->vec1_P;
1699     to = pcbddc->coarse_vec;
1700   }
1701   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1702   PetscFunctionReturn(0);
1703 }
1704 
1705 #undef __FUNCT__
1706 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1707 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1708 {
1709   PetscErrorCode ierr;
1710   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1711   PetscScalar    *array;
1712   Vec            from,to;
1713 
1714   PetscFunctionBegin;
1715   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1716     from = pcbddc->coarse_vec;
1717     to = pcbddc->vec1_P;
1718   } else { /* from local to global -> put data in coarse right hand side */
1719     from = pcbddc->vec1_P;
1720     to = pcbddc->coarse_vec;
1721   }
1722   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1723   if (smode == SCATTER_FORWARD) {
1724     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1725       Vec tvec;
1726 
1727       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1728       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
1729       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
1730       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
1731     }
1732   } else {
1733     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
1734      ierr = VecResetArray(from);CHKERRQ(ierr);
1735     }
1736   }
1737   PetscFunctionReturn(0);
1738 }
1739 
1740 /* uncomment for testing purposes */
1741 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1742 #undef __FUNCT__
1743 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1744 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1745 {
1746   PetscErrorCode    ierr;
1747   PC_IS*            pcis = (PC_IS*)(pc->data);
1748   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1749   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1750   /* one and zero */
1751   PetscScalar       one=1.0,zero=0.0;
1752   /* space to store constraints and their local indices */
1753   PetscScalar       *temp_quadrature_constraint;
1754   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1755   /* iterators */
1756   PetscInt          i,j,k,total_counts,temp_start_ptr;
1757   /* BLAS integers */
1758   PetscBLASInt      lwork,lierr;
1759   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1760   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1761   /* reuse */
1762   PetscInt          olocal_primal_size;
1763   PetscInt          *oprimal_indices_local_idxs;
1764   /* change of basis */
1765   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1766   PetscBool         boolforchange,qr_needed;
1767   PetscBT           touched,change_basis,qr_needed_idx;
1768   /* auxiliary stuff */
1769   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1770   PetscInt          ncc,*gidxs=NULL,*permutation=NULL,*temp_indices_to_constraint_work=NULL;
1771   PetscScalar       *temp_quadrature_constraint_work=NULL;
1772   /* some quantities */
1773   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1774   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1775 
1776   PetscFunctionBegin;
1777   /* Destroy Mat objects computed previously */
1778   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1779   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1780 
1781   /* print some info */
1782   if (pcbddc->dbg_flag) {
1783     IS       vertices;
1784     PetscInt nv,nedges,nfaces;
1785     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
1786     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
1787     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
1788     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1789     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1790     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
1791     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
1792     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
1793     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1794   }
1795 
1796   if (!pcbddc->adaptive_selection) {
1797     IS           ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1798     MatNullSpace nearnullsp;
1799     const Vec    *nearnullvecs;
1800     Vec          *localnearnullsp;
1801     PetscScalar  *array;
1802     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
1803     PetscBool    nnsp_has_cnst;
1804     /* LAPACK working arrays for SVD or POD */
1805     PetscBool    skip_lapack;
1806     PetscScalar  *work;
1807     PetscReal    *singular_vals;
1808 #if defined(PETSC_USE_COMPLEX)
1809     PetscReal    *rwork;
1810 #endif
1811 #if defined(PETSC_MISSING_LAPACK_GESVD)
1812     PetscScalar  *temp_basis,*correlation_mat;
1813 #else
1814     PetscBLASInt dummy_int=1;
1815     PetscScalar  dummy_scalar=1.;
1816 #endif
1817 
1818     /* Get index sets for faces, edges and vertices from graph */
1819     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1820     /* free unneeded index sets */
1821     if (!pcbddc->use_vertices) {
1822       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1823     }
1824     if (!pcbddc->use_edges) {
1825       for (i=0;i<n_ISForEdges;i++) {
1826         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1827       }
1828       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1829       n_ISForEdges = 0;
1830     }
1831     if (!pcbddc->use_faces) {
1832       for (i=0;i<n_ISForFaces;i++) {
1833         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1834       }
1835       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1836       n_ISForFaces = 0;
1837     }
1838     /* HACKS (the following two blocks of code) */
1839     if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1840       pcbddc->use_change_of_basis = PETSC_TRUE;
1841       if (!ISForEdges) {
1842         pcbddc->use_change_on_faces = PETSC_TRUE;
1843       }
1844     }
1845     if (pcbddc->NullSpace) {
1846       /* use_change_of_basis should be consistent among processors */
1847       PetscBool tbool[2],gbool[2];
1848       tbool[0] = pcbddc->use_change_of_basis;
1849       tbool[1] = pcbddc->use_change_on_faces;
1850       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1851       pcbddc->use_change_of_basis = gbool[0];
1852       pcbddc->use_change_on_faces = gbool[1];
1853     }
1854 
1855     /* check if near null space is attached to global mat */
1856     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1857     if (nearnullsp) {
1858       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1859       /* remove any stored info */
1860       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1861       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1862       /* store information for BDDC solver reuse */
1863       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1864       pcbddc->onearnullspace = nearnullsp;
1865       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1866       for (i=0;i<nnsp_size;i++) {
1867         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1868       }
1869     } else { /* if near null space is not provided BDDC uses constants by default */
1870       nnsp_size = 0;
1871       nnsp_has_cnst = PETSC_TRUE;
1872     }
1873     /* get max number of constraints on a single cc */
1874     max_constraints = nnsp_size;
1875     if (nnsp_has_cnst) max_constraints++;
1876 
1877     /*
1878          Evaluate maximum storage size needed by the procedure
1879          - temp_indices will contain start index of each constraint stored as follows
1880          - temp_indices_to_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1881          - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1882          - temp_quadrature_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself
1883                                                                                                                                                            */
1884     total_counts = n_ISForFaces+n_ISForEdges;
1885     total_counts *= max_constraints;
1886     n_vertices = 0;
1887     if (ISForVertices) {
1888       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1889     }
1890     total_counts += n_vertices;
1891     ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr);
1892     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1893     total_counts = 0;
1894     max_size_of_constraint = 0;
1895     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1896       if (i<n_ISForEdges) {
1897         used_IS = &ISForEdges[i];
1898       } else {
1899         used_IS = &ISForFaces[i-n_ISForEdges];
1900       }
1901       ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1902       total_counts += j;
1903       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1904     }
1905     total_counts *= max_constraints;
1906     total_counts += n_vertices;
1907     ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1908     /* get local part of global near null space vectors */
1909     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1910     for (k=0;k<nnsp_size;k++) {
1911       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1912       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1913       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1914     }
1915 
1916     /* whether or not to skip lapack calls */
1917     skip_lapack = PETSC_TRUE;
1918     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
1919 
1920     /* allocate some auxiliary stuff */
1921     if (!skip_lapack || pcbddc->use_qr_single) {
1922       ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr);
1923     } else {
1924       gidxs = NULL;
1925       permutation = NULL;
1926       temp_indices_to_constraint_work = NULL;
1927       temp_quadrature_constraint_work = NULL;
1928     }
1929 
1930     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1931     if (!skip_lapack) {
1932       PetscScalar temp_work;
1933 
1934 #if defined(PETSC_MISSING_LAPACK_GESVD)
1935       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1936       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1937       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1938       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1939 #if defined(PETSC_USE_COMPLEX)
1940       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1941 #endif
1942       /* now we evaluate the optimal workspace using query with lwork=-1 */
1943       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1944       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1945       lwork = -1;
1946       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1947 #if !defined(PETSC_USE_COMPLEX)
1948       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1949 #else
1950       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1951 #endif
1952       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1953       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1954 #else /* on missing GESVD */
1955       /* SVD */
1956       PetscInt max_n,min_n;
1957       max_n = max_size_of_constraint;
1958       min_n = max_constraints;
1959       if (max_size_of_constraint < max_constraints) {
1960         min_n = max_size_of_constraint;
1961         max_n = max_constraints;
1962       }
1963       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1964 #if defined(PETSC_USE_COMPLEX)
1965       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1966 #endif
1967       /* now we evaluate the optimal workspace using query with lwork=-1 */
1968       lwork = -1;
1969       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1970       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1971       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1972       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1973 #if !defined(PETSC_USE_COMPLEX)
1974       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
1975 #else
1976       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
1977 #endif
1978       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1979       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1980 #endif /* on missing GESVD */
1981       /* Allocate optimal workspace */
1982       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1983       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
1984     }
1985     /* Now we can loop on constraining sets */
1986     total_counts = 0;
1987     temp_indices[0] = 0;
1988     /* vertices */
1989     if (ISForVertices) {
1990       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1991       if (nnsp_has_cnst) { /* consider all vertices */
1992         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1993         for (i=0;i<n_vertices;i++) {
1994           temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1995           temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1996           total_counts++;
1997         }
1998       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1999         PetscBool used_vertex;
2000         for (i=0;i<n_vertices;i++) {
2001           used_vertex = PETSC_FALSE;
2002           k = 0;
2003           while (!used_vertex && k<nnsp_size) {
2004             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2005             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
2006               temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
2007               temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
2008               temp_indices[total_counts+1]=temp_indices[total_counts]+1;
2009               total_counts++;
2010               used_vertex = PETSC_TRUE;
2011             }
2012             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2013             k++;
2014           }
2015         }
2016       }
2017       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2018       n_vertices = total_counts;
2019     }
2020 
2021     /* edges and faces */
2022     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2023       if (ncc<n_ISForEdges) {
2024         used_IS = &ISForEdges[ncc];
2025         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2026       } else {
2027         used_IS = &ISForFaces[ncc-n_ISForEdges];
2028         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2029       }
2030       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2031       temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
2032       ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
2033       ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2034       /* change of basis should not be performed on local periodic nodes */
2035       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2036       if (nnsp_has_cnst) {
2037         PetscScalar quad_value;
2038         temp_constraints++;
2039         if (!pcbddc->use_nnsp_true) {
2040           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2041         } else {
2042           quad_value = 1.0;
2043         }
2044         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2045         for (j=0;j<size_of_constraint;j++) {
2046           temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
2047         }
2048         /* sort by global ordering if using lapack subroutines (not needed!) */
2049         if (!skip_lapack || pcbddc->use_qr_single) {
2050           ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
2051           for (j=0;j<size_of_constraint;j++) {
2052             permutation[j]=j;
2053           }
2054           ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2055           for (j=0;j<size_of_constraint;j++) {
2056             if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen");
2057           }
2058           for (j=0;j<size_of_constraint;j++) {
2059             temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2060             temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2061           }
2062           ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2063           ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2064         }
2065         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2066         total_counts++;
2067       }
2068       for (k=0;k<nnsp_size;k++) {
2069         PetscReal real_value;
2070         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2071         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2072         for (j=0;j<size_of_constraint;j++) {
2073           temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
2074         }
2075         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2076         /* check if array is null on the connected component */
2077         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2078         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
2079         if (real_value > 0.0) { /* keep indices and values */
2080           /* sort by global ordering if using lapack subroutines */
2081           if (!skip_lapack || pcbddc->use_qr_single) {
2082             ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
2083             for (j=0;j<size_of_constraint;j++) {
2084               permutation[j]=j;
2085             }
2086             ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2087             for (j=0;j<size_of_constraint;j++) {
2088               temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2089               temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2090             }
2091             ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2092             ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2093           }
2094           temp_constraints++;
2095           temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2096           total_counts++;
2097         }
2098       }
2099       ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2100       valid_constraints = temp_constraints;
2101       if (!pcbddc->use_nnsp_true && temp_constraints) {
2102         if (temp_constraints == 1) { /* just normalize the constraint */
2103           PetscScalar norm;
2104           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2105           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2106           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2107           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2108         } else { /* perform SVD */
2109           PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2110 
2111 #if defined(PETSC_MISSING_LAPACK_GESVD)
2112           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2113              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2114              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2115                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2116                 from that computed using LAPACKgesvd
2117              -> This is due to a different computation of eigenvectors in LAPACKheev
2118              -> The quality of the POD-computed basis will be the same */
2119           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2120           /* Store upper triangular part of correlation matrix */
2121           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2122           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2123           for (j=0;j<temp_constraints;j++) {
2124             for (k=0;k<j+1;k++) {
2125               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one));
2126             }
2127           }
2128           /* compute eigenvalues and eigenvectors of correlation matrix */
2129           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2130           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2131 #if !defined(PETSC_USE_COMPLEX)
2132           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2133 #else
2134           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2135 #endif
2136           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2137           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2138           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2139           j = 0;
2140           while (j < temp_constraints && singular_vals[j] < tol) j++;
2141           total_counts = total_counts-j;
2142           valid_constraints = temp_constraints-j;
2143           /* scale and copy POD basis into used quadrature memory */
2144           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2145           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2146           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2147           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2148           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2149           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2150           if (j<temp_constraints) {
2151             PetscInt ii;
2152             for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
2153             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2154             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2155             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2156             for (k=0;k<temp_constraints-j;k++) {
2157               for (ii=0;ii<size_of_constraint;ii++) {
2158                 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2159               }
2160             }
2161           }
2162 #else  /* on missing GESVD */
2163           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2164           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2165           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2166           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2167 #if !defined(PETSC_USE_COMPLEX)
2168           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2169 #else
2170           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2171 #endif
2172           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2173           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2174           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2175           k = temp_constraints;
2176           if (k > size_of_constraint) k = size_of_constraint;
2177           j = 0;
2178           while (j < k && singular_vals[k-j-1] < tol) j++;
2179           valid_constraints = k-j;
2180           total_counts = total_counts-temp_constraints+valid_constraints;
2181 #endif /* on missing GESVD */
2182         }
2183       }
2184       /* setting change_of_basis flag is safe now */
2185       if (boolforchange) {
2186         for (j=0;j<valid_constraints;j++) {
2187           PetscBTSet(change_basis,total_counts-j-1);
2188         }
2189       }
2190     }
2191     /* free workspace */
2192     if (!skip_lapack || pcbddc->use_qr_single) {
2193       ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr);
2194     }
2195     if (!skip_lapack) {
2196       ierr = PetscFree(work);CHKERRQ(ierr);
2197 #if defined(PETSC_USE_COMPLEX)
2198       ierr = PetscFree(rwork);CHKERRQ(ierr);
2199 #endif
2200       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2201 #if defined(PETSC_MISSING_LAPACK_GESVD)
2202       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2203       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2204 #endif
2205     }
2206     for (k=0;k<nnsp_size;k++) {
2207       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2208     }
2209     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2210     /* free index sets of faces, edges and vertices */
2211     for (i=0;i<n_ISForFaces;i++) {
2212       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2213     }
2214     if (n_ISForFaces) {
2215       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2216     }
2217     for (i=0;i<n_ISForEdges;i++) {
2218       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2219     }
2220     if (n_ISForEdges) {
2221       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2222     }
2223     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2224   } else {
2225     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2226     PetscInt        cum = 0;
2227 
2228     total_counts = 0;
2229     n_vertices = 0;
2230     if (sub_schurs->is_Ej_com && pcbddc->use_vertices) {
2231       ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr);
2232     }
2233     max_constraints = 0;
2234     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2235       total_counts += pcbddc->adaptive_constraints_n[i];
2236       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2237     }
2238     temp_indices = pcbddc->adaptive_constraints_ptrs;
2239     temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs;
2240     temp_quadrature_constraint = pcbddc->adaptive_constraints_data;
2241 
2242 #if 0
2243     printf("Found %d totals\n",total_counts);
2244     for (i=0;i<total_counts;i++) {
2245       printf("const %d, start %d",i,temp_indices[i]);
2246       printf(" end %d:\n",temp_indices[i+1]);
2247       for (j=temp_indices[i];j<temp_indices[i+1];j++) {
2248         printf("  idxs %d",temp_indices_to_constraint[j]);
2249         printf("  data %1.2e\n",temp_quadrature_constraint[j]);
2250       }
2251     }
2252     for (i=0;i<n_vertices;i++) {
2253       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
2254     }
2255     for (i=0;i<sub_schurs->n_subs;i++) {
2256       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2257     }
2258 #endif
2259 
2260     max_size_of_constraint = 0;
2261     for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]);
2262     ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr);
2263     /* Change of basis */
2264     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2265     if (pcbddc->use_change_of_basis) {
2266       cum = n_vertices;
2267       for (i=0;i<sub_schurs->n_subs;i++) {
2268         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2269           for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) {
2270             ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr);
2271           }
2272         }
2273         cum += pcbddc->adaptive_constraints_n[i+n_vertices];
2274       }
2275     }
2276   }
2277 
2278   /* map temp_indices_to_constraint in boundary numbering */
2279   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
2280   if (i != temp_indices[total_counts]) {
2281     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
2282   }
2283 
2284   /* set quantities in pcbddc data structure and store previous primal size */
2285   /* n_vertices defines the number of subdomain corners in the primal space */
2286   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
2287   olocal_primal_size = pcbddc->local_primal_size;
2288   pcbddc->local_primal_size = total_counts;
2289   pcbddc->n_vertices = n_vertices;
2290   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
2291 
2292   /* Create constraint matrix */
2293   /* The constraint matrix is used to compute the l2g map of primal dofs */
2294   /* so we need to set it up properly either with or without change of basis */
2295   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2296   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2297   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2298   /* array to compute a local numbering of constraints : vertices first then constraints */
2299   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
2300   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
2301   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
2302   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
2303   /* auxiliary stuff for basis change */
2304   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
2305   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
2306 
2307   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2308   total_primal_vertices=0;
2309   for (i=0;i<pcbddc->local_primal_size;i++) {
2310     size_of_constraint=temp_indices[i+1]-temp_indices[i];
2311     if (size_of_constraint == 1) {
2312       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
2313       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
2314       aux_primal_minloc[total_primal_vertices]=0;
2315       total_primal_vertices++;
2316     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
2317       PetscInt min_loc,min_index;
2318       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
2319       /* find first untouched local node */
2320       k = 0;
2321       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
2322       min_index = global_indices[k];
2323       min_loc = k;
2324       /* search the minimum among global nodes already untouched on the cc */
2325       for (k=1;k<size_of_constraint;k++) {
2326         /* there can be more than one constraint on a single connected component */
2327         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
2328           min_index = global_indices[k];
2329           min_loc = k;
2330         }
2331       }
2332       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
2333       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
2334       aux_primal_minloc[total_primal_vertices]=min_loc;
2335       total_primal_vertices++;
2336     }
2337   }
2338   /* determine if a QR strategy is needed for change of basis */
2339   qr_needed = PETSC_FALSE;
2340   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
2341   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2342     if (PetscBTLookup(change_basis,i)) {
2343       if (!pcbddc->use_qr_single) {
2344         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2345         j = 0;
2346         for (k=0;k<size_of_constraint;k++) {
2347           if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
2348             j++;
2349           }
2350         }
2351         /* found more than one primal dof on the cc */
2352         if (j > 1) {
2353           PetscBTSet(qr_needed_idx,i);
2354           qr_needed = PETSC_TRUE;
2355         }
2356       } else {
2357         PetscBTSet(qr_needed_idx,i);
2358         qr_needed = PETSC_TRUE;
2359       }
2360     }
2361   }
2362   /* free workspace */
2363   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2364 
2365   /* permute indices in order to have a sorted set of vertices */
2366   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
2367 
2368   /* nonzero structure of constraint matrix */
2369   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2370   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
2371   j=total_primal_vertices;
2372   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2373     if (!PetscBTLookup(change_basis,i)) {
2374       nnz[j]=temp_indices[i+1]-temp_indices[i];
2375       j++;
2376     }
2377   }
2378   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2379   ierr = PetscFree(nnz);CHKERRQ(ierr);
2380   /* set values in constraint matrix */
2381   for (i=0;i<total_primal_vertices;i++) {
2382     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2383   }
2384   total_counts = total_primal_vertices;
2385   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2386     if (!PetscBTLookup(change_basis,i)) {
2387       size_of_constraint=temp_indices[i+1]-temp_indices[i];
2388       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
2389       total_counts++;
2390     }
2391   }
2392   /* assembling */
2393   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2394   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2395   /*
2396   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2397   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2398   */
2399   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2400   if (pcbddc->use_change_of_basis) {
2401     /* dual and primal dofs on a single cc */
2402     PetscInt     dual_dofs,primal_dofs;
2403     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
2404     PetscInt     primal_counter;
2405     /* working stuff for GEQRF */
2406     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2407     PetscBLASInt lqr_work;
2408     /* working stuff for UNGQR */
2409     PetscScalar  *gqr_work,lgqr_work_t;
2410     PetscBLASInt lgqr_work;
2411     /* working stuff for TRTRS */
2412     PetscScalar  *trs_rhs;
2413     PetscBLASInt Blas_NRHS;
2414     /* pointers for values insertion into change of basis matrix */
2415     PetscInt     *start_rows,*start_cols;
2416     PetscScalar  *start_vals;
2417     /* working stuff for values insertion */
2418     PetscBT      is_primal;
2419     /* matrix sizes */
2420     PetscInt     global_size,local_size;
2421     /* temporary change of basis */
2422     Mat          localChangeOfBasisMatrix;
2423     /* extra space for debugging */
2424     PetscScalar  *dbg_work;
2425 
2426     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2427     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2428     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2429     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2430     /* nonzeros for local mat */
2431     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2432     for (i=0;i<pcis->n;i++) nnz[i]=1;
2433     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2434       if (PetscBTLookup(change_basis,i)) {
2435         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2436         if (PetscBTLookup(qr_needed_idx,i)) {
2437           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2438         } else {
2439           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2;
2440           /* get local primal index on the cc */
2441           j = 0;
2442           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
2443           nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2444         }
2445       }
2446     }
2447     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2448     ierr = PetscFree(nnz);CHKERRQ(ierr);
2449     /* Set initial identity in the matrix */
2450     for (i=0;i<pcis->n;i++) {
2451       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2452     }
2453 
2454     if (pcbddc->dbg_flag) {
2455       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2456       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2457     }
2458 
2459 
2460     /* Now we loop on the constraints which need a change of basis */
2461     /*
2462        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2463        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2464 
2465        Basic blocks of change of basis matrix T computed by
2466 
2467           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2468 
2469             | 1        0   ...        0         s_1/S |
2470             | 0        1   ...        0         s_2/S |
2471             |              ...                        |
2472             | 0        ...            1     s_{n-1}/S |
2473             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2474 
2475             with S = \sum_{i=1}^n s_i^2
2476             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2477                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2478 
2479           - QR decomposition of constraints otherwise
2480     */
2481     if (qr_needed) {
2482       /* space to store Q */
2483       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2484       /* first we issue queries for optimal work */
2485       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2486       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2487       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2488       lqr_work = -1;
2489       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2490       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2491       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2492       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2493       lgqr_work = -1;
2494       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2495       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2496       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2497       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2498       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2499       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2500       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2501       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2502       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2503       /* array to store scaling factors for reflectors */
2504       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2505       /* array to store rhs and solution of triangular solver */
2506       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2507       /* allocating workspace for check */
2508       if (pcbddc->dbg_flag) {
2509         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
2510       }
2511     }
2512     /* array to store whether a node is primal or not */
2513     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2514     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2515     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2516     if (i != total_primal_vertices) {
2517       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2518     }
2519     for (i=0;i<total_primal_vertices;i++) {
2520       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2521     }
2522     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2523 
2524     /* loop on constraints and see whether or not they need a change of basis and compute it */
2525     /* -> using implicit ordering contained in temp_indices data */
2526     total_counts = pcbddc->n_vertices;
2527     primal_counter = total_counts;
2528     while (total_counts<pcbddc->local_primal_size) {
2529       primal_dofs = 1;
2530       if (PetscBTLookup(change_basis,total_counts)) {
2531         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2532         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) {
2533           primal_dofs++;
2534         }
2535         /* get constraint info */
2536         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2537         dual_dofs = size_of_constraint-primal_dofs;
2538 
2539         if (pcbddc->dbg_flag) {
2540           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2541         }
2542 
2543         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2544 
2545           /* copy quadrature constraints for change of basis check */
2546           if (pcbddc->dbg_flag) {
2547             ierr = PetscMemcpy(dbg_work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2548           }
2549           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2550           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2551 
2552           /* compute QR decomposition of constraints */
2553           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2554           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2555           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2556           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2557           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2558           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2559           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2560 
2561           /* explictly compute R^-T */
2562           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2563           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2564           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2565           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2566           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2567           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2568           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2569           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2570           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2571           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2572 
2573           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2574           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2575           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2576           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2577           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2578           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2579           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2580           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2581           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2582 
2583           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2584              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2585              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2586           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2587           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2588           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2589           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2590           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2591           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2592           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2593           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2594           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2595           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2596 
2597           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2598           start_rows = &temp_indices_to_constraint[temp_indices[total_counts]];
2599           /* insert cols for primal dofs */
2600           for (j=0;j<primal_dofs;j++) {
2601             start_vals = &qr_basis[j*size_of_constraint];
2602             start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2603             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2604           }
2605           /* insert cols for dual dofs */
2606           for (j=0,k=0;j<dual_dofs;k++) {
2607             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2608               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2609               start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k];
2610               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2611               j++;
2612             }
2613           }
2614 
2615           /* check change of basis */
2616           if (pcbddc->dbg_flag) {
2617             PetscInt   ii,jj;
2618             PetscBool valid_qr=PETSC_TRUE;
2619             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2620             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2621             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2622             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2623             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2624             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2625             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2626             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
2627             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2628             for (jj=0;jj<size_of_constraint;jj++) {
2629               for (ii=0;ii<primal_dofs;ii++) {
2630                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2631                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2632               }
2633             }
2634             if (!valid_qr) {
2635               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2636               for (jj=0;jj<size_of_constraint;jj++) {
2637                 for (ii=0;ii<primal_dofs;ii++) {
2638                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2639                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2640                   }
2641                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2642                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2643                   }
2644                 }
2645               }
2646             } else {
2647               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2648             }
2649           }
2650         } else { /* simple transformation block */
2651           PetscInt    row,col;
2652           PetscScalar val,norm;
2653 
2654           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2655           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one));
2656           for (j=0;j<size_of_constraint;j++) {
2657             PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2658             row = temp_indices_to_constraint[temp_indices[total_counts]+j];
2659             if (!PetscBTLookup(is_primal,row_B)) {
2660               col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2661               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2662               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2663             } else {
2664               for (k=0;k<size_of_constraint;k++) {
2665                 col = temp_indices_to_constraint[temp_indices[total_counts]+k];
2666                 if (row != col) {
2667                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2668                 } else {
2669                   val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm;
2670                 }
2671                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2672               }
2673             }
2674           }
2675           if (pcbddc->dbg_flag) {
2676             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2677           }
2678         }
2679         /* increment primal counter */
2680         primal_counter += primal_dofs;
2681       } else {
2682         if (pcbddc->dbg_flag) {
2683           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2684         }
2685       }
2686       /* increment constraint counter total_counts */
2687       total_counts += primal_dofs;
2688     }
2689 
2690     /* free workspace */
2691     if (qr_needed) {
2692       if (pcbddc->dbg_flag) {
2693         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
2694       }
2695       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2696       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2697       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2698       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2699       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2700     }
2701     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2702     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2703     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2704 
2705     /* assembling of global change of variable */
2706     {
2707       Mat      tmat;
2708       PetscInt bs;
2709 
2710       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2711       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2712       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2713       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2714       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2715       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2716       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
2717       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
2718       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2719       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2720       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2721       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2722       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2723       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2724       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2725       ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2726       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2727       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2728     }
2729     /* check */
2730     if (pcbddc->dbg_flag) {
2731       PetscReal error;
2732       Vec       x,x_change;
2733 
2734       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2735       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2736       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2737       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2738       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2739       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2740       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2741       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2742       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2743       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2744       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2745       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2746       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2747       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2748       ierr = VecDestroy(&x);CHKERRQ(ierr);
2749       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2750     }
2751 
2752     /* adapt sub_schurs computed (if any) */
2753     if (pcbddc->use_deluxe_scaling) {
2754       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2755       if (sub_schurs->S_Ej_all) {
2756         Mat S_1,S_2,tmat;
2757         IS is_all_N;
2758 
2759         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2760         ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,&tmat);CHKERRQ(ierr);
2761         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2762         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr);
2763         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2764         sub_schurs->S_Ej_all = S_1;
2765         ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr);
2766         ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2767         sub_schurs->sum_S_Ej_all = S_2;
2768         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2769       }
2770     }
2771     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2772   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2773     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2774     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2775   }
2776 
2777   /* set up change of basis context */
2778   if (pcbddc->ChangeOfBasisMatrix) {
2779     PCBDDCChange_ctx change_ctx;
2780 
2781     if (!pcbddc->new_global_mat) {
2782       PetscInt global_size,local_size;
2783 
2784       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2785       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2786       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2787       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2788       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2789       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2790       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2791       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2792       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2793     } else {
2794       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2795       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2796       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2797     }
2798     if (!pcbddc->user_ChangeOfBasisMatrix) {
2799       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2800       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2801     } else {
2802       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2803       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2804     }
2805     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2806     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2807     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2808     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2809   }
2810 
2811   /* get indices in local ordering for vertices and constraints */
2812   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2813     ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
2814     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2815   }
2816   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2817   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2818   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2819   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2820   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2821   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2822   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2823   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2824   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2825   /* set quantities in PCBDDC data struct */
2826   pcbddc->n_actual_vertices = i;
2827   /* check if a new primal space has been introduced */
2828   pcbddc->new_primal_space_local = PETSC_TRUE;
2829   if (olocal_primal_size == pcbddc->local_primal_size) {
2830     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2831     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2832     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2833   }
2834   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2835   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2836 
2837   /* flush dbg viewer */
2838   if (pcbddc->dbg_flag) {
2839     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2840   }
2841 
2842   /* free workspace */
2843   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2844   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2845   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2846   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2847   if (!pcbddc->adaptive_selection) {
2848     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2849     ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr);
2850   } else {
2851     ierr = PetscFree4(pcbddc->adaptive_constraints_n,
2852                       pcbddc->adaptive_constraints_ptrs,
2853                       pcbddc->adaptive_constraints_idxs,
2854                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2855     ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2856   }
2857   PetscFunctionReturn(0);
2858 }
2859 
2860 #undef __FUNCT__
2861 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2862 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2863 {
2864   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2865   PC_IS       *pcis = (PC_IS*)pc->data;
2866   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2867   PetscInt    ierr,i,vertex_size;
2868   PetscViewer viewer=pcbddc->dbg_viewer;
2869 
2870   PetscFunctionBegin;
2871   /* Reset previously computed graph */
2872   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2873   /* Init local Graph struct */
2874   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2875 
2876   /* Check validity of the csr graph passed in by the user */
2877   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2878     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2879   }
2880 
2881   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2882   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2883     PetscInt  *xadj,*adjncy;
2884     PetscInt  nvtxs;
2885     PetscBool flg_row=PETSC_FALSE;
2886 
2887     if (pcbddc->use_local_adj) {
2888 
2889       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2890       if (flg_row) {
2891         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2892         pcbddc->computed_rowadj = PETSC_TRUE;
2893       }
2894       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2895     } else if (pcbddc->current_level) { /* just compute subdomain's connected components for coarser levels */
2896       IS                     is_dummy;
2897       ISLocalToGlobalMapping l2gmap_dummy;
2898       PetscInt               j,sum;
2899       PetscInt               *cxadj,*cadjncy;
2900       const PetscInt         *idxs;
2901       PCBDDCGraph            graph;
2902       PetscBT                is_on_boundary;
2903 
2904       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
2905       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
2906       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
2907       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
2908       ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr);
2909       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
2910       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2911       if (flg_row) {
2912         graph->xadj = xadj;
2913         graph->adjncy = adjncy;
2914       }
2915       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
2916       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
2917       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2918 
2919       if (pcbddc->dbg_flag) {
2920         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr);
2921         for (i=0;i<graph->ncc;i++) {
2922           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
2923         }
2924         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2925       }
2926 
2927       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
2928       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2929       for (i=0;i<pcis->n_B;i++) {
2930         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
2931       }
2932       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2933 
2934       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
2935       sum = 0;
2936       for (i=0;i<graph->ncc;i++) {
2937         PetscInt sizecc = 0;
2938         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2939           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2940             sizecc++;
2941           }
2942         }
2943         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2944           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2945             cxadj[graph->queue[j]] = sizecc;
2946           }
2947         }
2948         sum += sizecc*sizecc;
2949       }
2950       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
2951       sum = 0;
2952       for (i=0;i<pcis->n;i++) {
2953         PetscInt temp = cxadj[i];
2954         cxadj[i] = sum;
2955         sum += temp;
2956       }
2957       cxadj[pcis->n] = sum;
2958       for (i=0;i<graph->ncc;i++) {
2959         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2960           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2961             PetscInt k,sizecc = 0;
2962             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
2963               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
2964                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
2965                 sizecc++;
2966               }
2967             }
2968           }
2969         }
2970       }
2971       if (pcis->n) {
2972         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
2973       } else {
2974         ierr = PetscFree(cxadj);CHKERRQ(ierr);
2975         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
2976       }
2977       graph->xadj = 0;
2978       graph->adjncy = 0;
2979       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
2980       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
2981     }
2982   }
2983 
2984   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2985   vertex_size = 1;
2986   if (pcbddc->user_provided_isfordofs) {
2987     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2988       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2989       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2990         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2991         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2992       }
2993       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2994       pcbddc->n_ISForDofs = 0;
2995       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2996     }
2997     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2998     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2999   } else {
3000     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
3001       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
3002       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3003       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3004         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3005       }
3006     }
3007   }
3008 
3009   /* Setup of Graph */
3010   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3011     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
3012   }
3013   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3014     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
3015   }
3016   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
3017 
3018   /* Graph's connected components analysis */
3019   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
3020 
3021   /* print some info to stdout */
3022   if (pcbddc->dbg_flag) {
3023     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
3024   }
3025 
3026   /* mark topography has done */
3027   pcbddc->recompute_topography = PETSC_FALSE;
3028   PetscFunctionReturn(0);
3029 }
3030 
3031 #undef __FUNCT__
3032 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
3033 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
3034 {
3035   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
3036   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
3037   PetscErrorCode ierr;
3038 
3039   PetscFunctionBegin;
3040   n = 0;
3041   vertices = 0;
3042   if (pcbddc->ConstraintMatrix) {
3043     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
3044     for (i=0;i<local_primal_size;i++) {
3045       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3046       if (size_of_constraint == 1) n++;
3047       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3048     }
3049     if (vertices_idx) {
3050       ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr);
3051       n = 0;
3052       for (i=0;i<local_primal_size;i++) {
3053         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3054         if (size_of_constraint == 1) {
3055           vertices[n++]=row_cmat_indices[0];
3056         }
3057         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3058       }
3059     }
3060   }
3061   *n_vertices = n;
3062   if (vertices_idx) *vertices_idx = vertices;
3063   PetscFunctionReturn(0);
3064 }
3065 
3066 #undef __FUNCT__
3067 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
3068 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
3069 {
3070   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
3071   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
3072   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
3073   PetscBT        touched;
3074   PetscErrorCode ierr;
3075 
3076     /* This function assumes that the number of local constraints per connected component
3077        is not greater than the number of nodes defined for the connected component
3078        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
3079   PetscFunctionBegin;
3080   n = 0;
3081   constraints_index = 0;
3082   if (pcbddc->ConstraintMatrix) {
3083     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
3084     max_size_of_constraint = 0;
3085     for (i=0;i<local_primal_size;i++) {
3086       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3087       if (size_of_constraint > 1) {
3088         n++;
3089       }
3090       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
3091       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3092     }
3093     if (constraints_idx) {
3094       ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr);
3095       ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr);
3096       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
3097       n = 0;
3098       for (i=0;i<local_primal_size;i++) {
3099         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3100         if (size_of_constraint > 1) {
3101           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
3102           /* find first untouched local node */
3103           j = 0;
3104           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
3105           min_index = row_cmat_global_indices[j];
3106           min_loc = j;
3107           /* search the minimum among nodes not yet touched on the connected component
3108              since there can be more than one constraint on a single cc */
3109           for (j=1;j<size_of_constraint;j++) {
3110             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
3111               min_index = row_cmat_global_indices[j];
3112               min_loc = j;
3113             }
3114           }
3115           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
3116           constraints_index[n++] = row_cmat_indices[min_loc];
3117         }
3118         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3119       }
3120       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
3121       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
3122     }
3123   }
3124   *n_constraints = n;
3125   if (constraints_idx) *constraints_idx = constraints_index;
3126   PetscFunctionReturn(0);
3127 }
3128 
3129 #undef __FUNCT__
3130 #define __FUNCT__ "PCBDDCSubsetNumbering"
3131 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
3132 {
3133   Vec            local_vec,global_vec;
3134   IS             seqis,paris;
3135   VecScatter     scatter_ctx;
3136   PetscScalar    *array;
3137   PetscInt       *temp_global_dofs;
3138   PetscScalar    globalsum;
3139   PetscInt       i,j,s;
3140   PetscInt       nlocals,first_index,old_index,max_local,max_global;
3141   PetscMPIInt    rank_prec_comm,size_prec_comm;
3142   PetscInt       *dof_sizes,*dof_displs;
3143   PetscBool      first_found;
3144   PetscErrorCode ierr;
3145 
3146   PetscFunctionBegin;
3147   /* mpi buffers */
3148   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
3149   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
3150   j = ( !rank_prec_comm ? size_prec_comm : 0);
3151   ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr);
3152   /* get maximum size of subset */
3153   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
3154   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
3155   max_local = 0;
3156   for (i=0;i<n_local_dofs;i++) {
3157     if (max_local < temp_global_dofs[i] ) {
3158       max_local = temp_global_dofs[i];
3159     }
3160   }
3161   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
3162   max_global++;
3163   max_local = 0;
3164   for (i=0;i<n_local_dofs;i++) {
3165     if (max_local < local_dofs[i] ) {
3166       max_local = local_dofs[i];
3167     }
3168   }
3169   max_local++;
3170   /* allocate workspace */
3171   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
3172   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
3173   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
3174   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
3175   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
3176   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
3177   /* create scatter */
3178   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
3179   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
3180   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
3181   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
3182   ierr = ISDestroy(&paris);CHKERRQ(ierr);
3183   /* init array */
3184   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
3185   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3186   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3187   if (local_dofs_mult) {
3188     for (i=0;i<n_local_dofs;i++) {
3189       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
3190     }
3191   } else {
3192     for (i=0;i<n_local_dofs;i++) {
3193       array[local_dofs[i]]=1.0;
3194     }
3195   }
3196   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3197   /* scatter into global vec and get total number of global dofs */
3198   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3199   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3200   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
3201   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
3202   /* Fill global_vec with cumulative function for global numbering */
3203   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
3204   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
3205   nlocals = 0;
3206   first_index = -1;
3207   first_found = PETSC_FALSE;
3208   for (i=0;i<s;i++) {
3209     if (!first_found && PetscRealPart(array[i]) > 0.1) {
3210       first_found = PETSC_TRUE;
3211       first_index = i;
3212     }
3213     nlocals += (PetscInt)PetscRealPart(array[i]);
3214   }
3215   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3216   if (!rank_prec_comm) {
3217     dof_displs[0]=0;
3218     for (i=1;i<size_prec_comm;i++) {
3219       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3220     }
3221   }
3222   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3223   if (first_found) {
3224     array[first_index] += (PetscScalar)nlocals;
3225     old_index = first_index;
3226     for (i=first_index+1;i<s;i++) {
3227       if (PetscRealPart(array[i]) > 0.1) {
3228         array[i] += array[old_index];
3229         old_index = i;
3230       }
3231     }
3232   }
3233   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
3234   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3235   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3236   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3237   /* get global ordering of local dofs */
3238   ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3239   if (local_dofs_mult) {
3240     for (i=0;i<n_local_dofs;i++) {
3241       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
3242     }
3243   } else {
3244     for (i=0;i<n_local_dofs;i++) {
3245       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
3246     }
3247   }
3248   ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3249   /* free workspace */
3250   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
3251   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
3252   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
3253   ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr);
3254   /* return pointer to global ordering of local dofs */
3255   *global_numbering_subset = temp_global_dofs;
3256   PetscFunctionReturn(0);
3257 }
3258 
3259 #undef __FUNCT__
3260 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3261 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3262 {
3263   PetscInt       i,j;
3264   PetscScalar    *alphas;
3265   PetscErrorCode ierr;
3266 
3267   PetscFunctionBegin;
3268   /* this implements stabilized Gram-Schmidt */
3269   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3270   for (i=0;i<n;i++) {
3271     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3272     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3273     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3274   }
3275   ierr = PetscFree(alphas);CHKERRQ(ierr);
3276   PetscFunctionReturn(0);
3277 }
3278 
3279 #undef __FUNCT__
3280 #define __FUNCT__ "MatISGetSubassemblingPattern"
3281 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
3282 {
3283   Mat             subdomain_adj;
3284   IS              new_ranks,ranks_send_to;
3285   MatPartitioning partitioner;
3286   Mat_IS          *matis;
3287   PetscInt        n_neighs,*neighs,*n_shared,**shared;
3288   PetscInt        prank;
3289   PetscMPIInt     size,rank,color;
3290   PetscInt        *xadj,*adjncy,*oldranks;
3291   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
3292   PetscInt        i,local_size,threshold=0;
3293   PetscErrorCode  ierr;
3294   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3295   PetscSubcomm    subcomm;
3296 
3297   PetscFunctionBegin;
3298   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3299   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3300   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3301 
3302   /* Get info on mapping */
3303   matis = (Mat_IS*)(mat->data);
3304   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3305   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3306 
3307   /* build local CSR graph of subdomains' connectivity */
3308   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3309   xadj[0] = 0;
3310   xadj[1] = PetscMax(n_neighs-1,0);
3311   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3312   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3313 
3314   if (threshold) {
3315     PetscInt xadj_count = 0;
3316     for (i=1;i<n_neighs;i++) {
3317       if (n_shared[i] > threshold) {
3318         adjncy[xadj_count] = neighs[i];
3319         adjncy_wgt[xadj_count] = n_shared[i];
3320         xadj_count++;
3321       }
3322     }
3323     xadj[1] = xadj_count;
3324   } else {
3325     if (xadj[1]) {
3326       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3327       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3328     }
3329   }
3330   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3331   if (use_square) {
3332     for (i=0;i<xadj[1];i++) {
3333       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3334     }
3335   }
3336   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3337 
3338   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3339 
3340   /*
3341     Restrict work on active processes only.
3342   */
3343   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3344   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3345   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3346   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3347   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3348   if (color) {
3349     ierr = PetscFree(xadj);CHKERRQ(ierr);
3350     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3351     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3352   } else {
3353     PetscInt coarsening_ratio;
3354     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
3355     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3356     prank = rank;
3357     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
3358     /*
3359     for (i=0;i<size;i++) {
3360       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3361     }
3362     */
3363     for (i=0;i<xadj[1];i++) {
3364       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3365     }
3366     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3367     ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3368     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3369 
3370     /* Partition */
3371     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
3372     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3373     if (use_vwgt) {
3374       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3375       v_wgt[0] = local_size;
3376       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3377     }
3378     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3379     coarsening_ratio = size/n_subdomains;
3380     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3381     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3382     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3383     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3384 
3385     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3386     if (contiguous) {
3387       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3388     } else {
3389       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3390     }
3391     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3392     /* clean up */
3393     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3394     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3395     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3396     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3397   }
3398   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3399 
3400   /* assemble parallel IS for sends */
3401   i = 1;
3402   if (color) i=0;
3403   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3404 
3405   /* get back IS */
3406   *is_sends = ranks_send_to;
3407   PetscFunctionReturn(0);
3408 }
3409 
3410 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3411 
3412 #undef __FUNCT__
3413 #define __FUNCT__ "MatISSubassemble"
3414 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3415 {
3416   Mat                    local_mat;
3417   Mat_IS                 *matis;
3418   IS                     is_sends_internal;
3419   PetscInt               rows,cols,new_local_rows;
3420   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3421   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3422   ISLocalToGlobalMapping l2gmap;
3423   PetscInt*              l2gmap_indices;
3424   const PetscInt*        is_indices;
3425   MatType                new_local_type;
3426   /* buffers */
3427   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3428   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3429   PetscInt               *recv_buffer_idxs_local;
3430   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3431   /* MPI */
3432   MPI_Comm               comm,comm_n;
3433   PetscSubcomm           subcomm;
3434   PetscMPIInt            n_sends,n_recvs,commsize;
3435   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3436   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3437   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3438   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3439   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3440   PetscErrorCode         ierr;
3441 
3442   PetscFunctionBegin;
3443   /* TODO: add missing checks */
3444   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3445   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3446   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3447   PetscValidLogicalCollectiveInt(mat,nis,7);
3448   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3449   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3450   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3451   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3452   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3453   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3454   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3455   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3456     PetscInt mrows,mcols,mnrows,mncols;
3457     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3458     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3459     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3460     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3461     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3462     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3463   }
3464   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3465   PetscValidLogicalCollectiveInt(mat,bs,0);
3466   /* prepare IS for sending if not provided */
3467   if (!is_sends) {
3468     PetscBool pcontig = PETSC_TRUE;
3469     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3470     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3471   } else {
3472     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3473     is_sends_internal = is_sends;
3474   }
3475 
3476   /* get pointer of MATIS data */
3477   matis = (Mat_IS*)mat->data;
3478 
3479   /* get comm */
3480   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3481 
3482   /* compute number of sends */
3483   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3484   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3485 
3486   /* compute number of receives */
3487   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3488   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3489   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3490   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3491   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3492   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3493   ierr = PetscFree(iflags);CHKERRQ(ierr);
3494 
3495   /* restrict comm if requested */
3496   subcomm = 0;
3497   destroy_mat = PETSC_FALSE;
3498   if (restrict_comm) {
3499     PetscMPIInt color,subcommsize;
3500 
3501     color = 0;
3502     if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */
3503     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3504     subcommsize = commsize - subcommsize;
3505     /* check if reuse has been requested */
3506     if (reuse == MAT_REUSE_MATRIX) {
3507       if (*mat_n) {
3508         PetscMPIInt subcommsize2;
3509         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3510         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3511         comm_n = PetscObjectComm((PetscObject)*mat_n);
3512       } else {
3513         comm_n = PETSC_COMM_SELF;
3514       }
3515     } else { /* MAT_INITIAL_MATRIX */
3516       PetscMPIInt rank;
3517 
3518       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3519       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3520       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3521       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3522       comm_n = PetscSubcommChild(subcomm);
3523     }
3524     /* flag to destroy *mat_n if not significative */
3525     if (color) destroy_mat = PETSC_TRUE;
3526   } else {
3527     comm_n = comm;
3528   }
3529 
3530   /* prepare send/receive buffers */
3531   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3532   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3533   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3534   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3535   if (nis) {
3536     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3537   }
3538 
3539   /* Get data from local matrices */
3540   if (!isdense) {
3541     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3542     /* TODO: See below some guidelines on how to prepare the local buffers */
3543     /*
3544        send_buffer_vals should contain the raw values of the local matrix
3545        send_buffer_idxs should contain:
3546        - MatType_PRIVATE type
3547        - PetscInt        size_of_l2gmap
3548        - PetscInt        global_row_indices[size_of_l2gmap]
3549        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3550     */
3551   } else {
3552     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3553     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3554     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3555     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3556     send_buffer_idxs[1] = i;
3557     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3558     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3559     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3560     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3561     for (i=0;i<n_sends;i++) {
3562       ilengths_vals[is_indices[i]] = len*len;
3563       ilengths_idxs[is_indices[i]] = len+2;
3564     }
3565   }
3566   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3567   /* additional is (if any) */
3568   if (nis) {
3569     PetscMPIInt psum;
3570     PetscInt j;
3571     for (j=0,psum=0;j<nis;j++) {
3572       PetscInt plen;
3573       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3574       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3575       psum += len+1; /* indices + lenght */
3576     }
3577     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3578     for (j=0,psum=0;j<nis;j++) {
3579       PetscInt plen;
3580       const PetscInt *is_array_idxs;
3581       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3582       send_buffer_idxs_is[psum] = plen;
3583       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3584       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3585       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3586       psum += plen+1; /* indices + lenght */
3587     }
3588     for (i=0;i<n_sends;i++) {
3589       ilengths_idxs_is[is_indices[i]] = psum;
3590     }
3591     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3592   }
3593 
3594   buf_size_idxs = 0;
3595   buf_size_vals = 0;
3596   buf_size_idxs_is = 0;
3597   for (i=0;i<n_recvs;i++) {
3598     buf_size_idxs += (PetscInt)olengths_idxs[i];
3599     buf_size_vals += (PetscInt)olengths_vals[i];
3600     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3601   }
3602   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3603   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3604   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3605 
3606   /* get new tags for clean communications */
3607   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3608   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3609   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3610 
3611   /* allocate for requests */
3612   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3613   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3614   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3615   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3616   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3617   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3618 
3619   /* communications */
3620   ptr_idxs = recv_buffer_idxs;
3621   ptr_vals = recv_buffer_vals;
3622   ptr_idxs_is = recv_buffer_idxs_is;
3623   for (i=0;i<n_recvs;i++) {
3624     source_dest = onodes[i];
3625     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3626     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3627     ptr_idxs += olengths_idxs[i];
3628     ptr_vals += olengths_vals[i];
3629     if (nis) {
3630       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3631       ptr_idxs_is += olengths_idxs_is[i];
3632     }
3633   }
3634   for (i=0;i<n_sends;i++) {
3635     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3636     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3637     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3638     if (nis) {
3639       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3640     }
3641   }
3642   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3643   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3644 
3645   /* assemble new l2g map */
3646   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3647   ptr_idxs = recv_buffer_idxs;
3648   new_local_rows = 0;
3649   for (i=0;i<n_recvs;i++) {
3650     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3651     ptr_idxs += olengths_idxs[i];
3652   }
3653   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3654   ptr_idxs = recv_buffer_idxs;
3655   new_local_rows = 0;
3656   for (i=0;i<n_recvs;i++) {
3657     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3658     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3659     ptr_idxs += olengths_idxs[i];
3660   }
3661   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3662   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3663   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3664 
3665   /* infer new local matrix type from received local matrices type */
3666   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3667   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3668   if (n_recvs) {
3669     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3670     ptr_idxs = recv_buffer_idxs;
3671     for (i=0;i<n_recvs;i++) {
3672       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3673         new_local_type_private = MATAIJ_PRIVATE;
3674         break;
3675       }
3676       ptr_idxs += olengths_idxs[i];
3677     }
3678     switch (new_local_type_private) {
3679       case MATDENSE_PRIVATE:
3680         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3681           new_local_type = MATSEQAIJ;
3682           bs = 1;
3683         } else { /* if I receive only 1 dense matrix */
3684           new_local_type = MATSEQDENSE;
3685           bs = 1;
3686         }
3687         break;
3688       case MATAIJ_PRIVATE:
3689         new_local_type = MATSEQAIJ;
3690         bs = 1;
3691         break;
3692       case MATBAIJ_PRIVATE:
3693         new_local_type = MATSEQBAIJ;
3694         break;
3695       case MATSBAIJ_PRIVATE:
3696         new_local_type = MATSEQSBAIJ;
3697         break;
3698       default:
3699         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3700         break;
3701     }
3702   } else { /* by default, new_local_type is seqdense */
3703     new_local_type = MATSEQDENSE;
3704     bs = 1;
3705   }
3706 
3707   /* create MATIS object if needed */
3708   if (reuse == MAT_INITIAL_MATRIX) {
3709     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3710     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3711   } else {
3712     /* it also destroys the local matrices */
3713     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3714   }
3715   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3716   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3717 
3718   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3719 
3720   /* Global to local map of received indices */
3721   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3722   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3723   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3724 
3725   /* restore attributes -> type of incoming data and its size */
3726   buf_size_idxs = 0;
3727   for (i=0;i<n_recvs;i++) {
3728     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3729     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3730     buf_size_idxs += (PetscInt)olengths_idxs[i];
3731   }
3732   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3733 
3734   /* set preallocation */
3735   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3736   if (!newisdense) {
3737     PetscInt *new_local_nnz=0;
3738 
3739     ptr_vals = recv_buffer_vals;
3740     ptr_idxs = recv_buffer_idxs_local;
3741     if (n_recvs) {
3742       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3743     }
3744     for (i=0;i<n_recvs;i++) {
3745       PetscInt j;
3746       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3747         for (j=0;j<*(ptr_idxs+1);j++) {
3748           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3749         }
3750       } else {
3751         /* TODO */
3752       }
3753       ptr_idxs += olengths_idxs[i];
3754     }
3755     if (new_local_nnz) {
3756       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3757       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3758       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3759       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3760       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3761       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3762     } else {
3763       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3764     }
3765     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3766   } else {
3767     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3768   }
3769 
3770   /* set values */
3771   ptr_vals = recv_buffer_vals;
3772   ptr_idxs = recv_buffer_idxs_local;
3773   for (i=0;i<n_recvs;i++) {
3774     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3775       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3776       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3777       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3778       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3779       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3780     } else {
3781       /* TODO */
3782     }
3783     ptr_idxs += olengths_idxs[i];
3784     ptr_vals += olengths_vals[i];
3785   }
3786   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3787   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3788   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3789   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3790   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3791   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3792 
3793 #if 0
3794   if (!restrict_comm) { /* check */
3795     Vec       lvec,rvec;
3796     PetscReal infty_error;
3797 
3798     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3799     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3800     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3801     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3802     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3803     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3804     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3805     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3806     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3807   }
3808 #endif
3809 
3810   /* assemble new additional is (if any) */
3811   if (nis) {
3812     PetscInt **temp_idxs,*count_is,j,psum;
3813 
3814     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3815     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3816     ptr_idxs = recv_buffer_idxs_is;
3817     psum = 0;
3818     for (i=0;i<n_recvs;i++) {
3819       for (j=0;j<nis;j++) {
3820         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3821         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3822         psum += plen;
3823         ptr_idxs += plen+1; /* shift pointer to received data */
3824       }
3825     }
3826     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3827     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3828     for (i=1;i<nis;i++) {
3829       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3830     }
3831     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3832     ptr_idxs = recv_buffer_idxs_is;
3833     for (i=0;i<n_recvs;i++) {
3834       for (j=0;j<nis;j++) {
3835         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3836         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3837         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3838         ptr_idxs += plen+1; /* shift pointer to received data */
3839       }
3840     }
3841     for (i=0;i<nis;i++) {
3842       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3843       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3844       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3845     }
3846     ierr = PetscFree(count_is);CHKERRQ(ierr);
3847     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3848     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3849   }
3850   /* free workspace */
3851   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3852   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3853   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3854   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3855   if (isdense) {
3856     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3857     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3858   } else {
3859     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3860   }
3861   if (nis) {
3862     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3863     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3864   }
3865   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3866   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3867   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3868   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3869   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3870   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3871   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3872   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3873   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3874   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3875   ierr = PetscFree(onodes);CHKERRQ(ierr);
3876   if (nis) {
3877     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3878     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3879     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3880   }
3881   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3882   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3883     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3884     for (i=0;i<nis;i++) {
3885       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3886     }
3887   }
3888   PetscFunctionReturn(0);
3889 }
3890 
3891 /* temporary hack into ksp private data structure */
3892 #include <petsc-private/kspimpl.h>
3893 
3894 #undef __FUNCT__
3895 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3896 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3897 {
3898   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3899   PC_IS                  *pcis = (PC_IS*)pc->data;
3900   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3901   MatNullSpace           CoarseNullSpace=NULL;
3902   ISLocalToGlobalMapping coarse_islg;
3903   IS                     coarse_is,*isarray;
3904   PetscInt               i,im_active=-1,active_procs=-1;
3905   PetscInt               nis,nisdofs,nisneu;
3906   PC                     pc_temp;
3907   PCType                 coarse_pc_type;
3908   KSPType                coarse_ksp_type;
3909   PetscBool              multilevel_requested,multilevel_allowed;
3910   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3911   Mat                    t_coarse_mat_is;
3912   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3913   PetscMPIInt            all_procs;
3914   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3915   PetscBool              compute_vecs = PETSC_FALSE;
3916   PetscScalar            *array;
3917   PetscErrorCode         ierr;
3918 
3919   PetscFunctionBegin;
3920   /* Assign global numbering to coarse dofs */
3921   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3922     PetscInt ocoarse_size;
3923     compute_vecs = PETSC_TRUE;
3924     ocoarse_size = pcbddc->coarse_size;
3925     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3926     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3927     /* see if we can avoid some work */
3928     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3929       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3930         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3931         coarse_reuse = PETSC_FALSE;
3932       } else { /* we can safely reuse already computed coarse matrix */
3933         coarse_reuse = PETSC_TRUE;
3934       }
3935     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3936       coarse_reuse = PETSC_FALSE;
3937     }
3938     /* reset any subassembling information */
3939     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3940     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3941   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3942     coarse_reuse = PETSC_TRUE;
3943   }
3944 
3945   /* count "active" (i.e. with positive local size) and "void" processes */
3946   im_active = !!(pcis->n);
3947   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3948   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3949   void_procs = all_procs-active_procs;
3950   csin_type_simple = PETSC_TRUE;
3951   redist = PETSC_FALSE;
3952   if (pcbddc->current_level && void_procs) {
3953     csin_ml = PETSC_TRUE;
3954     ncoarse_ml = void_procs;
3955     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
3956     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
3957       csin_ds = PETSC_TRUE;
3958       ncoarse_ds = pcbddc->redistribute_coarse;
3959       redist = PETSC_TRUE;
3960     } else {
3961       csin_ds = PETSC_TRUE;
3962       ncoarse_ds = active_procs;
3963       redist = PETSC_TRUE;
3964     }
3965   } else {
3966     csin_ml = PETSC_FALSE;
3967     ncoarse_ml = all_procs;
3968     if (void_procs) {
3969       csin_ds = PETSC_TRUE;
3970       ncoarse_ds = void_procs;
3971       csin_type_simple = PETSC_FALSE;
3972     } else {
3973       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
3974         csin_ds = PETSC_TRUE;
3975         ncoarse_ds = pcbddc->redistribute_coarse;
3976         redist = PETSC_TRUE;
3977       } else {
3978         csin_ds = PETSC_FALSE;
3979         ncoarse_ds = all_procs;
3980       }
3981     }
3982   }
3983 
3984   /*
3985     test if we can go multilevel: three conditions must be satisfied:
3986     - we have not exceeded the number of levels requested
3987     - we can actually subassemble the active processes
3988     - we can find a suitable number of MPI processes where we can place the subassembled problem
3989   */
3990   multilevel_allowed = PETSC_FALSE;
3991   multilevel_requested = PETSC_FALSE;
3992   if (pcbddc->current_level < pcbddc->max_levels) {
3993     multilevel_requested = PETSC_TRUE;
3994     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3995       multilevel_allowed = PETSC_FALSE;
3996     } else {
3997       multilevel_allowed = PETSC_TRUE;
3998     }
3999   }
4000   /* determine number of process partecipating to coarse solver */
4001   if (multilevel_allowed) {
4002     ncoarse = ncoarse_ml;
4003     csin = csin_ml;
4004     redist = PETSC_FALSE;
4005   } else {
4006     ncoarse = ncoarse_ds;
4007     csin = csin_ds;
4008   }
4009 
4010   /* creates temporary l2gmap and IS for coarse indexes */
4011   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
4012   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
4013 
4014   /* creates temporary MATIS object for coarse matrix */
4015   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
4016   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4017   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
4018   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4019 #if 0
4020   {
4021     PetscViewer viewer;
4022     char filename[256];
4023     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
4024     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
4025     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4026     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
4027     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4028   }
4029 #endif
4030   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
4031   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
4032   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4033   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4034   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
4035 
4036   /* compute dofs splitting and neumann boundaries for coarse dofs */
4037   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
4038     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
4039     const PetscInt         *idxs;
4040     ISLocalToGlobalMapping tmap;
4041 
4042     /* create map between primal indices (in local representative ordering) and local primal numbering */
4043     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
4044     /* allocate space for temporary storage */
4045     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
4046     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
4047     /* allocate for IS array */
4048     nisdofs = pcbddc->n_ISForDofsLocal;
4049     nisneu = !!pcbddc->NeumannBoundariesLocal;
4050     nis = nisdofs + nisneu;
4051     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
4052     /* dofs splitting */
4053     for (i=0;i<nisdofs;i++) {
4054       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4055       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4056       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4057       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4058       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4059       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4060       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4061       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4062     }
4063     /* neumann boundaries */
4064     if (pcbddc->NeumannBoundariesLocal) {
4065       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4066       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4067       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4068       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4069       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4070       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4071       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4072       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4073     }
4074     /* free memory */
4075     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4076     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4077     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4078   } else {
4079     nis = 0;
4080     nisdofs = 0;
4081     nisneu = 0;
4082     isarray = NULL;
4083   }
4084   /* destroy no longer needed map */
4085   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4086 
4087   /* restrict on coarse candidates (if needed) */
4088   coarse_mat_is = NULL;
4089   if (csin) {
4090     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4091       if (redist) {
4092         PetscMPIInt rank;
4093         PetscInt    spc,n_spc_p1,dest[1],destsize;
4094 
4095         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4096         spc = active_procs/ncoarse;
4097         n_spc_p1 = active_procs%ncoarse;
4098         if (im_active) {
4099           destsize = 1;
4100           if (rank > n_spc_p1*(spc+1)-1) {
4101             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4102           } else {
4103             dest[0] = rank/(spc+1);
4104           }
4105         } else {
4106           destsize = 0;
4107         }
4108         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4109       } else if (csin_type_simple) {
4110         PetscMPIInt rank;
4111         PetscInt    issize,isidx;
4112 
4113         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4114         if (im_active) {
4115           issize = 1;
4116           isidx = (PetscInt)rank;
4117         } else {
4118           issize = 0;
4119           isidx = -1;
4120         }
4121         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4122       } else { /* get a suitable subassembling pattern from MATIS code */
4123         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4124       }
4125 
4126       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4127       if (!redist || ncoarse <= void_procs) {
4128         PetscInt ncoarse_cand,tissize,*nisindices;
4129         PetscInt *coarse_candidates;
4130         const PetscInt* tisindices;
4131 
4132         /* get coarse candidates' ranks in pc communicator */
4133         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4134         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4135         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4136           if (!coarse_candidates[i]) {
4137             coarse_candidates[ncoarse_cand++]=i;
4138           }
4139         }
4140         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4141 
4142 
4143         if (pcbddc->dbg_flag) {
4144           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4145           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4146           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4147           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4148           for (i=0;i<ncoarse_cand;i++) {
4149             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4150           }
4151           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4152           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4153         }
4154         /* shift the pattern on coarse candidates */
4155         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4156         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4157         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4158         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4159         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4160         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4161         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4162       }
4163       if (pcbddc->dbg_flag) {
4164         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4165         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4166         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4167         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4168       }
4169     }
4170     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4171     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4172   } else {
4173     if (pcbddc->dbg_flag) {
4174       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4175       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4176       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4177     }
4178     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4179     coarse_mat_is = t_coarse_mat_is;
4180   }
4181 
4182   /* create local to global scatters for coarse problem */
4183   if (compute_vecs) {
4184     PetscInt lrows;
4185     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4186     if (coarse_mat_is) {
4187       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4188     } else {
4189       lrows = 0;
4190     }
4191     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4192     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4193     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4194     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4195     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4196   }
4197   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4198   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4199 
4200   /* set defaults for coarse KSP and PC */
4201   if (multilevel_allowed) {
4202     coarse_ksp_type = KSPRICHARDSON;
4203     coarse_pc_type = PCBDDC;
4204   } else {
4205     coarse_ksp_type = KSPPREONLY;
4206     coarse_pc_type = PCREDUNDANT;
4207   }
4208 
4209   /* print some info if requested */
4210   if (pcbddc->dbg_flag) {
4211     if (!multilevel_allowed) {
4212       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4213       if (multilevel_requested) {
4214         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4215       } else if (pcbddc->max_levels) {
4216         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4217       }
4218       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4219     }
4220   }
4221 
4222   /* create the coarse KSP object only once with defaults */
4223   if (coarse_mat_is) {
4224     MatReuse coarse_mat_reuse;
4225     PetscViewer dbg_viewer = NULL;
4226     if (pcbddc->dbg_flag) {
4227       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4228       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4229     }
4230     if (!pcbddc->coarse_ksp) {
4231       char prefix[256],str_level[16];
4232       size_t len;
4233       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4234       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4235       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4236       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4237       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4238       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4239       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4240       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4241       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4242       /* prefix */
4243       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4244       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4245       if (!pcbddc->current_level) {
4246         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4247         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4248       } else {
4249         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4250         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4251         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4252         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4253         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4254         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4255       }
4256       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4257       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4258       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4259       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4260       ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4261       /* allow user customization */
4262       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4263     }
4264     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4265     if (nisdofs) {
4266       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4267       for (i=0;i<nisdofs;i++) {
4268         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4269       }
4270     }
4271     if (nisneu) {
4272       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4273       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4274     }
4275 
4276     /* get some info after set from options */
4277     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4278     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4279     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4280     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4281     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4282       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4283       isbddc = PETSC_FALSE;
4284     }
4285     if (isredundant) {
4286       KSP inner_ksp;
4287       PC inner_pc;
4288       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4289       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4290       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4291     }
4292 
4293     /* assemble coarse matrix */
4294     if (coarse_reuse) {
4295       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4296       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4297       coarse_mat_reuse = MAT_REUSE_MATRIX;
4298     } else {
4299       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4300     }
4301     if (isbddc || isnn) {
4302       if (pcbddc->coarsening_ratio > 1) {
4303         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4304           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4305           if (pcbddc->dbg_flag) {
4306             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4307             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4308             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4309             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4310           }
4311         }
4312         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4313       } else {
4314         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4315         coarse_mat = coarse_mat_is;
4316       }
4317     } else {
4318       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4319     }
4320     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4321 
4322     /* propagate symmetry info to coarse matrix */
4323     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
4324     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4325 
4326     /* set operators */
4327     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4328     if (pcbddc->dbg_flag) {
4329       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4330     }
4331   } else { /* processes non partecipating to coarse solver (if any) */
4332     coarse_mat = 0;
4333   }
4334   ierr = PetscFree(isarray);CHKERRQ(ierr);
4335 #if 0
4336   {
4337     PetscViewer viewer;
4338     char filename[256];
4339     sprintf(filename,"coarse_mat.m");
4340     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4341     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4342     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4343     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4344   }
4345 #endif
4346 
4347   /* Compute coarse null space (special handling by BDDC only) */
4348   if (pcbddc->NullSpace) {
4349     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4350   }
4351 
4352   if (pcbddc->coarse_ksp) {
4353     Vec crhs,csol;
4354     PetscBool ispreonly;
4355     if (CoarseNullSpace) {
4356       if (isbddc) {
4357         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4358       } else {
4359         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4360       }
4361     }
4362     /* setup coarse ksp */
4363     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4364     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4365     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4366     /* hack */
4367     if (!csol) {
4368       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4369     }
4370     if (!crhs) {
4371       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4372     }
4373     /* Check coarse problem if in debug mode or if solving with an iterative method */
4374     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4375     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4376       KSP       check_ksp;
4377       KSPType   check_ksp_type;
4378       PC        check_pc;
4379       Vec       check_vec,coarse_vec;
4380       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4381       PetscInt  its;
4382       PetscBool compute_eigs;
4383       PetscReal *eigs_r,*eigs_c;
4384       PetscInt  neigs;
4385       const char *prefix;
4386 
4387       /* Create ksp object suitable for estimation of extreme eigenvalues */
4388       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4389       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4390       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4391       if (ispreonly) {
4392         check_ksp_type = KSPPREONLY;
4393         compute_eigs = PETSC_FALSE;
4394       } else {
4395         check_ksp_type = KSPGMRES;
4396         compute_eigs = PETSC_TRUE;
4397       }
4398       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4399       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4400       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4401       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4402       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4403       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4404       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4405       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4406       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4407       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4408       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4409       /* create random vec */
4410       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4411       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4412       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4413       if (CoarseNullSpace) {
4414         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4415       }
4416       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4417       /* solve coarse problem */
4418       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4419       if (CoarseNullSpace) {
4420         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4421       }
4422       /* set eigenvalue estimation if preonly has not been requested */
4423       if (compute_eigs) {
4424         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4425         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4426         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4427         lambda_max = eigs_r[neigs-1];
4428         lambda_min = eigs_r[0];
4429         if (pcbddc->use_coarse_estimates) {
4430           if (lambda_max>lambda_min) {
4431             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4432             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4433           }
4434         }
4435       }
4436 
4437       /* check coarse problem residual error */
4438       if (pcbddc->dbg_flag) {
4439         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4440         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4441         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4442         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4443         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4444         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4445         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4446         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4447         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4448         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4449         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4450         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4451         if (compute_eigs) {
4452           PetscReal lambda_max_s,lambda_min_s;
4453           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4454           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4455           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4456           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4457           for (i=0;i<neigs;i++) {
4458             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4459           }
4460         }
4461         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4462         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4463       }
4464       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4465       if (compute_eigs) {
4466         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4467         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4468       }
4469     }
4470   }
4471   /* print additional info */
4472   if (pcbddc->dbg_flag) {
4473     /* waits until all processes reaches this point */
4474     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4475     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4476     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4477   }
4478 
4479   /* free memory */
4480   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4481   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4482   PetscFunctionReturn(0);
4483 }
4484 
4485 #undef __FUNCT__
4486 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4487 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4488 {
4489   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4490   PC_IS*         pcis = (PC_IS*)pc->data;
4491   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4492   PetscInt       i,coarse_size=0;
4493   PetscInt       *local_primal_indices=NULL;
4494   PetscErrorCode ierr;
4495 
4496   PetscFunctionBegin;
4497   /* Compute global number of coarse dofs */
4498   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
4499     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
4500   }
4501   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
4502 
4503   /* check numbering */
4504   if (pcbddc->dbg_flag) {
4505     PetscScalar coarsesum,*array;
4506     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4507 
4508     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4509     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4510     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4511     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4512     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4513     for (i=0;i<pcbddc->local_primal_size;i++) {
4514       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4515     }
4516     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4517     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4518     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4519     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4520     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4521     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4522     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4523     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4524     for (i=0;i<pcis->n;i++) {
4525       if (array[i] == 1.0) {
4526         set_error = PETSC_TRUE;
4527         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4528       }
4529     }
4530     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4531     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4532     for (i=0;i<pcis->n;i++) {
4533       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4534     }
4535     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4536     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4537     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4538     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4539     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4540     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4541     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4542       PetscInt *gidxs;
4543 
4544       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
4545       ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
4546       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4547       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4548       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4549       for (i=0;i<pcbddc->local_primal_size;i++) {
4550         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);
4551       }
4552       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4553       ierr = PetscFree(gidxs);CHKERRQ(ierr);
4554     }
4555     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4556     if (set_error_reduced) {
4557       SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4558     }
4559   }
4560   /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */
4561   /* get back data */
4562   *coarse_size_n = coarse_size;
4563   *local_primal_indices_n = local_primal_indices;
4564   PetscFunctionReturn(0);
4565 }
4566 
4567 #undef __FUNCT__
4568 #define __FUNCT__ "PCBDDCGlobalToLocal"
4569 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4570 {
4571   IS             localis_t;
4572   PetscInt       i,lsize,*idxs,n;
4573   PetscScalar    *vals;
4574   PetscErrorCode ierr;
4575 
4576   PetscFunctionBegin;
4577   /* get indices in local ordering exploiting local to global map */
4578   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4579   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4580   for (i=0;i<lsize;i++) vals[i] = 1.0;
4581   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4582   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4583   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4584   if (idxs) { /* multilevel guard */
4585     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4586   }
4587   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4588   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4589   ierr = PetscFree(vals);CHKERRQ(ierr);
4590   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4591   /* now compute set in local ordering */
4592   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4593   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4594   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4595   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4596   for (i=0,lsize=0;i<n;i++) {
4597     if (PetscRealPart(vals[i]) > 0.5) {
4598       lsize++;
4599     }
4600   }
4601   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4602   for (i=0,lsize=0;i<n;i++) {
4603     if (PetscRealPart(vals[i]) > 0.5) {
4604       idxs[lsize++] = i;
4605     }
4606   }
4607   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4608   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4609   *localis = localis_t;
4610   PetscFunctionReturn(0);
4611 }
4612 
4613 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4614 #undef __FUNCT__
4615 #define __FUNCT__ "PCBDDCMatMult_Private"
4616 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4617 {
4618   PCBDDCChange_ctx change_ctx;
4619   PetscErrorCode   ierr;
4620 
4621   PetscFunctionBegin;
4622   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4623   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4624   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4625   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4626   PetscFunctionReturn(0);
4627 }
4628 
4629 #undef __FUNCT__
4630 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4631 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4632 {
4633   PCBDDCChange_ctx change_ctx;
4634   PetscErrorCode   ierr;
4635 
4636   PetscFunctionBegin;
4637   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4638   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4639   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4640   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4641   PetscFunctionReturn(0);
4642 }
4643 
4644 #undef __FUNCT__
4645 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4646 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4647 {
4648   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4649   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4650   PetscInt            *used_xadj,*used_adjncy;
4651   PetscBool           free_used_adj;
4652   PetscErrorCode      ierr;
4653 
4654   PetscFunctionBegin;
4655   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4656   free_used_adj = PETSC_FALSE;
4657   if (pcbddc->sub_schurs_layers == -1) {
4658     used_xadj = NULL;
4659     used_adjncy = NULL;
4660   } else {
4661     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4662       used_xadj = pcbddc->mat_graph->xadj;
4663       used_adjncy = pcbddc->mat_graph->adjncy;
4664     } else if (pcbddc->computed_rowadj) {
4665       used_xadj = pcbddc->mat_graph->xadj;
4666       used_adjncy = pcbddc->mat_graph->adjncy;
4667     } else {
4668       PetscBool      flg_row=PETSC_FALSE;
4669       const PetscInt *xadj,*adjncy;
4670       PetscInt       nvtxs;
4671 
4672       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4673       if (flg_row) {
4674         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4675         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4676         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4677         free_used_adj = PETSC_TRUE;
4678       } else {
4679         pcbddc->sub_schurs_layers = -1;
4680         used_xadj = NULL;
4681         used_adjncy = NULL;
4682       }
4683       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4684     }
4685   }
4686   ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->adaptive_selection,pcbddc->adaptive_invert_Stildas,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4687 
4688   /* free adjacency */
4689   if (free_used_adj) {
4690     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4691   }
4692   PetscFunctionReturn(0);
4693 }
4694 
4695 #undef __FUNCT__
4696 #define __FUNCT__ "PCBDDCInitSubSchurs"
4697 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4698 {
4699   PC_IS               *pcis=(PC_IS*)pc->data;
4700   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4701   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4702   PCBDDCGraph         graph;
4703   Mat                 S_j;
4704   PetscErrorCode      ierr;
4705 
4706   PetscFunctionBegin;
4707   /* attach interface graph for determining subsets */
4708   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4709     IS verticesIS;
4710 
4711     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4712     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4713     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr);
4714     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr);
4715     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4716     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4717 /*
4718     if (pcbddc->dbg_flag) {
4719       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4720     }
4721 */
4722   } else {
4723     graph = pcbddc->mat_graph;
4724   }
4725 
4726   /* Create Schur complement matrix */
4727   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4728   ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4729 
4730   /* sub_schurs init */
4731   ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr);
4732   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4733   /* free graph struct */
4734   if (pcbddc->sub_schurs_rebuild) {
4735     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4736   }
4737   PetscFunctionReturn(0);
4738 }
4739