xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 4bc2dc4bd263f882f69e906548202714ae1fb6b6)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscScalar     *Smult,*Seigv;
20   PetscReal       *eigs,thresh;
21   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscErrorCode  ierr;
26 
27   PetscFunctionBegin;
28   if (!sub_schurs->use_mumps) {
29     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
30   }
31 
32   if (pcbddc->dbg_flag) {
33     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
34     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
35     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
36     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
37   }
38 
39   if (pcbddc->dbg_flag) {
40     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
41   }
42 
43   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
44     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
45   }
46 
47   /* max size of subsets */
48   mss = 0;
49   for (i=0;i<sub_schurs->n_subs;i++) {
50     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
51       PetscInt subset_size;
52       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
53       mss = PetscMax(mss,subset_size);
54     }
55   }
56 
57   /* min/max and threshold */
58   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
59   nmin = pcbddc->adaptive_nmin > -1 ? pcbddc->adaptive_nmin : 1;
60   nmax = PetscMax(nmin,nmax);
61 
62   /* allocate lapack workspace */
63   cum = cum2 = 0;
64   maxneigs = 0;
65   for (i=0;i<sub_schurs->n_subs;i++) {
66     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
67       PetscInt n,subset_size;
68       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
69       n = PetscMin(subset_size,nmax);
70       cum += subset_size*n;
71       cum2 += n;
72       maxneigs = PetscMax(maxneigs,n);
73     }
74   }
75 
76   if (mss) {
77     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
78       PetscBLASInt B_itype = 1;
79       PetscBLASInt B_N = mss;
80       PetscReal    zero = 0.0;
81       PetscReal    eps = 0.0; /* dlamch? */
82 
83       B_lwork = -1;
84       S = NULL;
85       St = NULL;
86       eigs = NULL;
87       eigv = NULL;
88       B_iwork = NULL;
89       B_ifail = NULL;
90       thresh = 1.0;
91       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
92 #if defined(PETSC_USE_COMPLEX)
93       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
94 #else
95       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
96 #endif
97       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
98       ierr = PetscFPTrapPop();CHKERRQ(ierr);
99     } else {
100         /* TODO */
101     }
102   } else {
103     lwork = 0;
104   }
105 
106   nv = 0;
107   if (sub_schurs->is_Ej_com && pcbddc->use_vertices) { /* complement of subsets, each entry is a vertex */
108     ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr);
109   }
110   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
111   ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs,
112                       B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
113 #if defined(PETSC_USE_COMPLEX)
114   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
115 #endif
116   ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr);
117   ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
118                       nv+cum2+1,&pcbddc->adaptive_constraints_ptrs,
119                       nv+cum,&pcbddc->adaptive_constraints_idxs,
120                       nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
121   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
122 
123   maxneigs = 0;
124   cum = cum2 = cumarray = 0;
125   if (sub_schurs->is_Ej_com && pcbddc->use_vertices) {
126     const PetscInt *idxs;
127 
128     ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
129     for (cum=0;cum<nv;cum++) {
130       pcbddc->adaptive_constraints_n[cum] = 1;
131       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
132       pcbddc->adaptive_constraints_ptrs[cum] = cum;
133       pcbddc->adaptive_constraints_data[cum] = 1.0;
134     }
135     cum2 = cum;
136     ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
137   }
138 
139   if (mss) { /* multilevel */
140     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
141     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
142   }
143 
144   for (i=0;i<sub_schurs->n_subs;i++) {
145     PetscInt j,subset_size;
146 
147     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
148     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
149       const PetscInt *idxs;
150       PetscScalar    one = 1.0,scalar_zero = 0.0;
151       PetscReal      zero=0.0;
152       PetscBLASInt   B_N;
153 
154       /* S should be copied since we need it for deluxe scaling */
155       if (sub_schurs->is_hermitian) {
156         PetscInt j;
157         for (j=0;j<subset_size;j++) {
158           ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
159         }
160         for (j=0;j<subset_size;j++) {
161           ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
162         }
163       } else {
164         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
165         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
166       }
167       /* is always this the right matrix? */
168       ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
169 
170       /* we could reuse space already allocated when building sum_S_Ej_tilda_all */
171       /* St = Starray+cumarray; */
172       ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
173       if (pcbddc->adaptive_threshold > 1.0) { /* this is an heuristic for edges */
174         thresh = (1.*pcbddc->mat_graph->count[idxs[0]])/(pcbddc->adaptive_threshold);
175       } else {
176         thresh = 1.0;
177       }
178 
179       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
180         PetscBLASInt B_itype = 1;
181         PetscBLASInt B_IL = 1, B_IU;
182         PetscReal    eps = -1.0; /* dlamch? */
183         PetscInt     nmin_s;
184 
185         ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
186         /* ask for eigenvalues lower than thresh */
187         if (pcbddc->dbg_flag) {
188           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
189         }
190         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
191 #if defined(PETSC_USE_COMPLEX)
192         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
193 #else
194         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
195 #endif
196         ierr = PetscFPTrapPop();CHKERRQ(ierr);
197         if (B_ierr) {
198           if (B_ierr < 0 ) {
199             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
200           } else if (B_ierr <= B_N) {
201             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
202           } else {
203             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
204           }
205         }
206 
207         if (B_neigs > nmax) {
208           if (pcbddc->dbg_flag) {
209             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
210           }
211           B_neigs = nmax;
212         }
213 
214         nmin_s = PetscMin(nmin,B_N);
215         if (B_neigs < nmin_s) {
216           PetscBLASInt B_neigs2;
217 
218           B_IL = B_neigs + 1;
219           ierr = PetscBLASIntCast(nmin_s,&B_IU);CHKERRQ(ierr);
220           if (pcbddc->dbg_flag) {
221             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
222           }
223           if (sub_schurs->is_hermitian) {
224             PetscInt j;
225             for (j=0;j<subset_size;j++) {
226               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
227             }
228             for (j=0;j<subset_size;j++) {
229               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
230             }
231           } else {
232             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
233             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
234           }
235           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
236 #if defined(PETSC_USE_COMPLEX)
237           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
238 #else
239           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
240 #endif
241           ierr = PetscFPTrapPop();CHKERRQ(ierr);
242           B_neigs += B_neigs2;
243         }
244         if (B_ierr) {
245           if (B_ierr < 0 ) {
246             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
247           } else if (B_ierr <= B_N) {
248             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
249           } else {
250             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
251           }
252         }
253         if (pcbddc->dbg_flag) {
254           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);
255           for (j=0;j<B_neigs;j++) {
256             if (eigs[j] == 0.0) {
257               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");
258             } else {
259               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",1.0/eigs[j]);
260             }
261           }
262         }
263       } else {
264           /* TODO */
265       }
266 
267       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
268       if (B_neigs) {
269         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&scalar_zero,Seigv,&B_N));
270       }
271       maxneigs = PetscMax(B_neigs,maxneigs);
272       pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
273       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
274 
275       if (pcbddc->dbg_flag > 1) {
276         PetscInt ii;
277         for (ii=0;ii<B_neigs;ii++) {
278           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
279           for (j=0;j<B_N;j++) {
280             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e %1.4e\n",eigv[ii*B_N+j],Seigv[ii*B_N+j]);
281           }
282         }
283       }
284       for (j=0;j<B_neigs;j++) {
285 #if 0
286         {
287           PetscBLASInt Blas_N,Blas_one = 1.0;
288           PetscScalar norm;
289           ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
290           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
291           if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
292             norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
293           } else {
294             norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
295           }
296           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
297         }
298 #endif
299         ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
300         pcbddc->adaptive_constraints_ptrs[cum++] = cum2;
301         cum2 += subset_size;
302       }
303       ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
304     }
305     /* shift for next computation */
306     cumarray += subset_size*subset_size;
307   }
308   if (pcbddc->dbg_flag) {
309     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
310   }
311   pcbddc->adaptive_constraints_ptrs[cum] = cum2;
312   ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr);
313 
314   if (mss) {
315     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
316     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
317   }
318   ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
319 #if defined(PETSC_USE_COMPLEX)
320   ierr = PetscFree(rwork);CHKERRQ(ierr);
321 #endif
322   if (pcbddc->dbg_flag) {
323     PetscInt maxneigs_r;
324     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
325     ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
326   }
327   PetscFunctionReturn(0);
328 }
329 
330 #undef __FUNCT__
331 #define __FUNCT__ "PCBDDCSetUpSolvers"
332 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
333 {
334   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
335   PetscScalar    *coarse_submat_vals;
336   PetscErrorCode ierr;
337 
338   PetscFunctionBegin;
339   /* Setup local scatters R_to_B and (optionally) R_to_D */
340   /* PCBDDCSetUpLocalWorkVectors should be called first! */
341   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
342 
343   /* Setup local neumann solver ksp_R */
344   /* PCBDDCSetUpLocalScatters should be called first! */
345   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
346 
347   /* Change global null space passed in by the user if change of basis has been requested */
348   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
349     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
350   }
351 
352   /*
353      Setup local correction and local part of coarse basis.
354      Gives back the dense local part of the coarse matrix in column major ordering
355   */
356   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
357 
358   /* Compute total number of coarse nodes and setup coarse solver */
359   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
360 
361   /* free */
362   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
363   PetscFunctionReturn(0);
364 }
365 
366 #undef __FUNCT__
367 #define __FUNCT__ "PCBDDCResetCustomization"
368 PetscErrorCode PCBDDCResetCustomization(PC pc)
369 {
370   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
371   PetscErrorCode ierr;
372 
373   PetscFunctionBegin;
374   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
375   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
376   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
377   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
378   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
379   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
380   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
381   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
382   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
383   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
384   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
385   PetscFunctionReturn(0);
386 }
387 
388 #undef __FUNCT__
389 #define __FUNCT__ "PCBDDCResetTopography"
390 PetscErrorCode PCBDDCResetTopography(PC pc)
391 {
392   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
393   PetscErrorCode ierr;
394 
395   PetscFunctionBegin;
396   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
397   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
398   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
399   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
400   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
401   PetscFunctionReturn(0);
402 }
403 
404 #undef __FUNCT__
405 #define __FUNCT__ "PCBDDCResetSolvers"
406 PetscErrorCode PCBDDCResetSolvers(PC pc)
407 {
408   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
409   PetscScalar    *array;
410   PetscErrorCode ierr;
411 
412   PetscFunctionBegin;
413   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
414   if (pcbddc->coarse_phi_B) {
415     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
416     ierr = PetscFree(array);CHKERRQ(ierr);
417   }
418   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
419   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
420   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
421   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
422   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
423   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
424   if (pcbddc->local_auxmat2) {
425     ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
426     ierr = PetscFree(array);CHKERRQ(ierr);
427   }
428   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
429   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
430   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
431   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
432   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
433   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
434   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
435   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
436   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
437   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
438   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
439   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
440   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
441   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
442   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
443   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
444   PetscFunctionReturn(0);
445 }
446 
447 #undef __FUNCT__
448 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
449 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
450 {
451   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
452   PC_IS          *pcis = (PC_IS*)pc->data;
453   VecType        impVecType;
454   PetscInt       n_constraints,n_R,old_size;
455   PetscErrorCode ierr;
456 
457   PetscFunctionBegin;
458   if (!pcbddc->ConstraintMatrix) {
459     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
460   }
461   /* get sizes */
462   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
463   n_R = pcis->n-pcbddc->n_actual_vertices;
464   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
465   /* local work vectors (try to avoid unneeded work)*/
466   /* R nodes */
467   old_size = -1;
468   if (pcbddc->vec1_R) {
469     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
470   }
471   if (n_R != old_size) {
472     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
473     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
474     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
475     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
476     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
477     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
478   }
479   /* local primal dofs */
480   old_size = -1;
481   if (pcbddc->vec1_P) {
482     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
483   }
484   if (pcbddc->local_primal_size != old_size) {
485     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
486     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
487     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
488     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
489   }
490   /* local explicit constraints */
491   old_size = -1;
492   if (pcbddc->vec1_C) {
493     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
494   }
495   if (n_constraints && n_constraints != old_size) {
496     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
497     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
498     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
499     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
500   }
501   PetscFunctionReturn(0);
502 }
503 
504 #undef __FUNCT__
505 #define __FUNCT__ "PCBDDCSetUpCorrection"
506 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
507 {
508   PetscErrorCode         ierr;
509   /* pointers to pcis and pcbddc */
510   PC_IS*                 pcis = (PC_IS*)pc->data;
511   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
512   /* submatrices of local problem */
513   Mat                    A_RV,A_VR,A_VV;
514   /* submatrices of local coarse problem */
515   Mat                    S_VV,S_CV,S_VC,S_CC;
516   /* working matrices */
517   Mat                    C_CR;
518   /* additional working stuff */
519   PC                     pc_R;
520   Mat                    F;
521   PetscBool              isLU,isCHOL,isILU;
522 
523   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
524   PetscScalar            *work;
525   PetscInt               *idx_V_B;
526   PetscInt               n,n_vertices,n_constraints;
527   PetscInt               i,n_R,n_D,n_B;
528   PetscBool              unsymmetric_check;
529   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
530   MatType                impMatType;
531   /* some shortcuts to scalars */
532   PetscScalar            one=1.0,m_one=-1.0;
533 
534   PetscFunctionBegin;
535   /* get number of vertices (corners plus constraints with change of basis)
536      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
537   n_vertices = pcbddc->n_actual_vertices;
538   n_constraints = pcbddc->local_primal_size-n_vertices;
539   /* Set Non-overlapping dimensions */
540   n_B = pcis->n_B; n_D = pcis->n - n_B;
541   n_R = pcis->n-n_vertices;
542 
543   /* Set types for local objects needed by BDDC precondtioner */
544   impMatType = MATSEQDENSE;
545 
546   /* vertices in boundary numbering */
547   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
548   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
549   if (i != n_vertices) {
550     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
551   }
552 
553   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
554   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
555   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
556   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
557   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
558   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
559   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
560   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
561   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
562   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
563 
564   unsymmetric_check = PETSC_FALSE;
565   /* allocate workspace */
566   n = 0;
567   if (n_constraints) {
568     n += n_R*n_constraints;
569   }
570   if (n_vertices) {
571     n = PetscMax(2*n_R*n_vertices,n);
572   }
573   if (!pcbddc->issym) {
574     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
575     unsymmetric_check = PETSC_TRUE;
576   }
577   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
578 
579   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
580   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
581   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
582   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
583   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
584   if (isLU || isILU || isCHOL) {
585     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
586   } else {
587     F = NULL;
588   }
589 
590   /* Precompute stuffs needed for preprocessing and application of BDDC*/
591   if (n_constraints) {
592     Mat M1,M2,M3;
593     IS  is_aux;
594     /* see if we can save some allocations */
595     if (pcbddc->local_auxmat2) {
596       PetscInt on_R,on_constraints;
597       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
598       if (on_R != n_R || on_constraints != n_constraints) {
599         PetscScalar *marray;
600 
601         ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr);
602         ierr = PetscFree(marray);CHKERRQ(ierr);
603         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
604         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
605       }
606     }
607     /* auxiliary matrices */
608     if (!pcbddc->local_auxmat2) {
609       PetscScalar *marray;
610 
611       ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr);
612       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr);
613       marray += n_R*n_constraints;
614       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr);
615     }
616 
617     /* Extract constraints on R nodes: C_{CR}  */
618     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
619     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
620     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
621 
622     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
623     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
624     for (i=0;i<n_constraints;i++) {
625       const PetscScalar *row_cmat_values;
626       const PetscInt    *row_cmat_indices;
627       PetscInt          size_of_constraint,j;
628 
629       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
630       for (j=0;j<size_of_constraint;j++) {
631         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
632       }
633       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
634     }
635     if (F) {
636       Mat B;
637 
638       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
639       ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr);
640       ierr = MatDestroy(&B);CHKERRQ(ierr);
641     } else {
642       PetscScalar *xarray;
643       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
644       for (i=0;i<n_constraints;i++) {
645         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
646         ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr);
647         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
648         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
649         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
650       }
651       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
652     }
653 
654     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
655     ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
656     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
657     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
658     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
659     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
660     ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr);
661     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
662     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
663     ierr = MatDestroy(&M2);CHKERRQ(ierr);
664     ierr = MatDestroy(&M3);CHKERRQ(ierr);
665     /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */
666     ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
667     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
668     ierr = MatDestroy(&M1);CHKERRQ(ierr);
669   }
670   /* Get submatrices from subdomain matrix */
671   if (n_vertices) {
672     Mat       newmat;
673     IS        is_aux;
674     PetscInt  ibs,mbs;
675     PetscBool issbaij;
676 
677     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
678     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
679     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
680     if (ibs != mbs) { /* need to convert to SEQAIJ */
681       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
682       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
683       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
684       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
685       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
686     } else {
687       /* this is safe */
688       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
689       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
690       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
691         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
692         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
693         ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
694         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
695         ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
696       } else {
697         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
698         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
699       }
700     }
701     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
702   }
703 
704   /* Matrix of coarse basis functions (local) */
705   if (pcbddc->coarse_phi_B) {
706     PetscInt on_B,on_primal,on_D=n_D;
707     if (pcbddc->coarse_phi_D) {
708       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
709     }
710     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
711     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
712       PetscScalar *marray;
713 
714       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
715       ierr = PetscFree(marray);CHKERRQ(ierr);
716       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
717       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
718       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
719       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
720     }
721   }
722 
723   if (!pcbddc->coarse_phi_B) {
724     PetscScalar *marray;
725 
726     n = n_B*pcbddc->local_primal_size;
727     if (pcbddc->switch_static || pcbddc->dbg_flag) {
728       n += n_D*pcbddc->local_primal_size;
729     }
730     if (!pcbddc->issym) {
731       n *= 2;
732     }
733     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
734     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
735     n = n_B*pcbddc->local_primal_size;
736     if (pcbddc->switch_static || pcbddc->dbg_flag) {
737       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
738       n += n_D*pcbddc->local_primal_size;
739     }
740     if (!pcbddc->issym) {
741       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
742       if (pcbddc->switch_static || pcbddc->dbg_flag) {
743         n = n_B*pcbddc->local_primal_size;
744         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
745       }
746     } else {
747       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
748       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
749       if (pcbddc->switch_static || pcbddc->dbg_flag) {
750         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
751         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
752       }
753     }
754   }
755   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
756   /* vertices */
757   if (n_vertices) {
758 
759     if (n_R) {
760       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
761       PetscBLASInt B_N,B_one = 1;
762       PetscScalar  *x,*y;
763 
764       ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
765       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
766       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
767       if (F) {
768         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
769       } else {
770         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
771         for (i=0;i<n_vertices;i++) {
772           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
773           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
774           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
775           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
776           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
777         }
778         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
779       }
780       ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr);
781       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
782       if (n_constraints) {
783         Mat B;
784         ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
785         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
786         ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
787         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
788         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
789         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
790         ierr = MatDestroy(&B);CHKERRQ(ierr);
791       }
792       ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
793       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
794       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
795       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
796       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
797       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
798       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
799       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
800       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
801       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
802       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
803       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
804     } else {
805       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
806       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
807     }
808     /* coarse basis functions */
809     for (i=0;i<n_vertices;i++) {
810       PetscScalar *y;
811 
812       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
813       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
814       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
815       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
816       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
817       y[n_B*i+idx_V_B[i]] = 1.0;
818       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
819       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
820 
821       if (pcbddc->switch_static || pcbddc->dbg_flag) {
822         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
823         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
824         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
825         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
826         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
827         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
828       }
829       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
830     }
831     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
832     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
833   }
834 
835   if (n_constraints) {
836     Mat B;
837 
838     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
839     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
840     ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
841     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
842     if (n_vertices) {
843       ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr);
844     }
845     ierr = MatDestroy(&B);CHKERRQ(ierr);
846     /* coarse basis functions */
847     for (i=0;i<n_constraints;i++) {
848       PetscScalar *y;
849 
850       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
851       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
852       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
853       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
854       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
855       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
856       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
857 
858       if (pcbddc->switch_static || pcbddc->dbg_flag) {
859         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
860         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
861         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
862         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
863         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
864         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
865       }
866       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
867     }
868   }
869 
870   /* compute other basis functions for non-symmetric problems */
871   if (!pcbddc->issym) {
872     Mat B,X;
873 
874     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr);
875 
876     if (n_constraints) {
877       Mat S_CCT,B_C;
878 
879       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
880       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
881       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
882       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
883       if (n_vertices) {
884         Mat B_V,S_VCT;
885 
886         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
887         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
888         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
889         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
890         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
891       }
892       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
893     }
894     if (n_vertices && n_R) {
895       Mat          A_VRT;
896       PetscBLASInt B_N,B_one = 1;
897 
898       if (!n_constraints) { /* if there are no constraints, reset work */
899         ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
900       }
901       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr);
902       ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
903       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
904       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one));
905       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
906     }
907 
908     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr);
909     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
910       for (i=0;i<pcbddc->local_primal_size;i++) {
911         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
912         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
913         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
914         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
915         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
916       }
917     } else {
918       for (i=0;i<pcbddc->local_primal_size;i++) {
919         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
920         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
921         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
922         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
923         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
924       }
925     }
926     ierr = MatDestroy(&B);CHKERRQ(ierr);
927     /* coarse basis functions */
928     for (i=0;i<pcbddc->local_primal_size;i++) {
929       PetscScalar *y;
930 
931       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
932       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
933       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
934       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
935       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
936       if (i<n_vertices) {
937         y[n_B*i+idx_V_B[i]] = 1.0;
938       }
939       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
940       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
941 
942       if (pcbddc->switch_static || pcbddc->dbg_flag) {
943         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
944         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
945         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
946         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
947         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
948         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
949       }
950       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
951     }
952     ierr = MatDestroy(&X);CHKERRQ(ierr);
953   }
954 
955   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
956   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
957   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
958   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
959   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
960   /* Checking coarse_sub_mat and coarse basis functios */
961   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
962   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
963   if (pcbddc->dbg_flag) {
964     Mat         coarse_sub_mat;
965     Mat         AUXMAT,TM1,TM2,TM3,TM4;
966     Mat         coarse_phi_D,coarse_phi_B;
967     Mat         coarse_psi_D,coarse_psi_B;
968     Mat         A_II,A_BB,A_IB,A_BI;
969     Mat         C_B,CPHI;
970     IS          is_dummy;
971     Vec         mones;
972     MatType     checkmattype=MATSEQAIJ;
973     PetscReal   real_value;
974 
975     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
976     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
977     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
978     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
979     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
980     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
981     if (unsymmetric_check) {
982       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
983       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
984     }
985     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
986 
987     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
988     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr);
989     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
990     if (unsymmetric_check) {
991       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
992       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
993       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
994       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
995       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
996       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
997       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
998       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
999       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1000       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1001       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1002       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1003     } else {
1004       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1005       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1006       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1007       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1008       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1009       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1010       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1011       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1012     }
1013     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1014     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1015     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1016     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1017     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1018     ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1019     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1020     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d      matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1021 
1022     /* check constraints */
1023     ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr);
1024     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1025     ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1026     ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr);
1027     ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1028     ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1029     ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1030     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1031     ierr = MatDestroy(&C_B);CHKERRQ(ierr);
1032     ierr = MatDestroy(&CPHI);CHKERRQ(ierr);
1033     ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1034     ierr = VecDestroy(&mones);CHKERRQ(ierr);
1035 
1036     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1037     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1038     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1039     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1040     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1041     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1042     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1043     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1044     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1045     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1046     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1047     if (unsymmetric_check) {
1048       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1049       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1050     }
1051     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1052   }
1053 
1054   /* free memory */
1055   ierr = PetscFree(work);CHKERRQ(ierr);
1056   if (n_vertices) {
1057     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1058   }
1059   if (n_constraints) {
1060     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1061   }
1062   /* get back data */
1063   *coarse_submat_vals_n = coarse_submat_vals;
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 #undef __FUNCT__
1068 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1069 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1070 {
1071   Mat            *work_mat;
1072   IS             isrow_s,iscol_s;
1073   PetscBool      rsorted,csorted;
1074   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1075   PetscErrorCode ierr;
1076 
1077   PetscFunctionBegin;
1078   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1079   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1080   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1081   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1082 
1083   if (!rsorted) {
1084     const PetscInt *idxs;
1085     PetscInt *idxs_sorted,i;
1086 
1087     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1088     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1089     for (i=0;i<rsize;i++) {
1090       idxs_perm_r[i] = i;
1091     }
1092     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1093     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1094     for (i=0;i<rsize;i++) {
1095       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1096     }
1097     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1098     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1099   } else {
1100     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1101     isrow_s = isrow;
1102   }
1103 
1104   if (!csorted) {
1105     if (isrow == iscol) {
1106       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1107       iscol_s = isrow_s;
1108     } else {
1109       const PetscInt *idxs;
1110       PetscInt *idxs_sorted,i;
1111 
1112       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1113       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1114       for (i=0;i<csize;i++) {
1115         idxs_perm_c[i] = i;
1116       }
1117       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1118       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1119       for (i=0;i<csize;i++) {
1120         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1121       }
1122       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1123       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1124     }
1125   } else {
1126     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1127     iscol_s = iscol;
1128   }
1129 
1130   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1131 
1132   if (!rsorted || !csorted) {
1133     Mat      new_mat;
1134     IS       is_perm_r,is_perm_c;
1135 
1136     if (!rsorted) {
1137       PetscInt *idxs_r,i;
1138       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1139       for (i=0;i<rsize;i++) {
1140         idxs_r[idxs_perm_r[i]] = i;
1141       }
1142       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1143       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1144     } else {
1145       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1146     }
1147     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1148 
1149     if (!csorted) {
1150       if (isrow_s == iscol_s) {
1151         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1152         is_perm_c = is_perm_r;
1153       } else {
1154         PetscInt *idxs_c,i;
1155         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1156         for (i=0;i<csize;i++) {
1157           idxs_c[idxs_perm_c[i]] = i;
1158         }
1159         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1160         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1161       }
1162     } else {
1163       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1164     }
1165     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1166 
1167     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1168     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1169     work_mat[0] = new_mat;
1170     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1171     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1172   }
1173 
1174   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1175   *B = work_mat[0];
1176   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1177   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1178   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 #undef __FUNCT__
1183 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1184 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1185 {
1186   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1187   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1188   Mat            new_mat;
1189   IS             is_local,is_global;
1190   PetscInt       local_size;
1191   PetscBool      isseqaij;
1192   PetscErrorCode ierr;
1193 
1194   PetscFunctionBegin;
1195   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1196   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1197   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1198   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1199   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1200   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1201   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1202 
1203   /* check */
1204   if (pcbddc->dbg_flag) {
1205     Vec       x,x_change;
1206     PetscReal error;
1207 
1208     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1209     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1210     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1211     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1212     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1213     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1214     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1215     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1216     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1217     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1218     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1219     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1220     ierr = VecDestroy(&x);CHKERRQ(ierr);
1221     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1222   }
1223 
1224   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1225   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1226   if (isseqaij) {
1227     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1228   } else {
1229     Mat work_mat;
1230     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1231     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1232     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1233   }
1234   ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
1235 #if !defined(PETSC_USE_COMPLEX)
1236   ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,pcbddc->issym);CHKERRQ(ierr);
1237 #endif
1238   /*
1239   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1240   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1241   */
1242   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1243   PetscFunctionReturn(0);
1244 }
1245 
1246 #undef __FUNCT__
1247 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1248 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1249 {
1250   PC_IS*         pcis = (PC_IS*)(pc->data);
1251   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1252   IS             is_aux1,is_aux2;
1253   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1254   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
1255   PetscInt       vbs,bs;
1256   PetscBT        bitmask;
1257   PetscErrorCode ierr;
1258 
1259   PetscFunctionBegin;
1260   /*
1261     No need to setup local scatters if
1262       - primal space is unchanged
1263         AND
1264       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1265         AND
1266       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1267   */
1268   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1269     PetscFunctionReturn(0);
1270   }
1271   /* destroy old objects */
1272   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1273   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1274   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1275   /* Set Non-overlapping dimensions */
1276   n_B = pcis->n_B; n_D = pcis->n - n_B;
1277   n_vertices = pcbddc->n_actual_vertices;
1278   /* create auxiliary bitmask */
1279   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1280   for (i=0;i<n_vertices;i++) {
1281     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
1282   }
1283 
1284   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1285   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1286   for (i=0, n_R=0; i<pcis->n; i++) {
1287     if (!PetscBTLookup(bitmask,i)) {
1288       idx_R_local[n_R] = i;
1289       n_R++;
1290     }
1291   }
1292 
1293   /* Block code */
1294   vbs = 1;
1295   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1296   if (bs>1 && !(n_vertices%bs)) {
1297     PetscBool is_blocked = PETSC_TRUE;
1298     PetscInt  *vary;
1299     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1300     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1301     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1302     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
1303     for (i=0; i<n_vertices/bs; i++) {
1304       if (vary[i]!=0 && vary[i]!=bs) {
1305         is_blocked = PETSC_FALSE;
1306         break;
1307       }
1308     }
1309     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1310       vbs = bs;
1311       for (i=0;i<n_R/vbs;i++) {
1312         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1313       }
1314     }
1315     ierr = PetscFree(vary);CHKERRQ(ierr);
1316   }
1317   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1318   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1319 
1320   /* print some info if requested */
1321   if (pcbddc->dbg_flag) {
1322     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1323     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1324     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1325     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1326     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1327     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1328     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
1329     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1330   }
1331 
1332   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1333   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1334   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1335   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1336   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1337   for (i=0; i<n_D; i++) {
1338     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1339   }
1340   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1341   for (i=0, j=0; i<n_R; i++) {
1342     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1343       aux_array1[j++] = i;
1344     }
1345   }
1346   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1347   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1348   for (i=0, j=0; i<n_B; i++) {
1349     if (!PetscBTLookup(bitmask,is_indices[i])) {
1350       aux_array2[j++] = i;
1351     }
1352   }
1353   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1354   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1355   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1356   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1357   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1358 
1359   if (pcbddc->switch_static || pcbddc->dbg_flag) {
1360     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1361     for (i=0, j=0; i<n_R; i++) {
1362       if (PetscBTLookup(bitmask,idx_R_local[i])) {
1363         aux_array1[j++] = i;
1364       }
1365     }
1366     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1367     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1368     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1369   }
1370   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1371   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1372   PetscFunctionReturn(0);
1373 }
1374 
1375 
1376 #undef __FUNCT__
1377 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1378 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1379 {
1380   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1381   PC_IS          *pcis = (PC_IS*)pc->data;
1382   PC             pc_temp;
1383   Mat            A_RR;
1384   MatReuse       reuse;
1385   PetscScalar    m_one = -1.0;
1386   PetscReal      value;
1387   PetscInt       n_D,n_R,ibs,mbs;
1388   PetscBool      use_exact,use_exact_reduced,issbaij;
1389   PetscErrorCode ierr;
1390   /* prefixes stuff */
1391   char           dir_prefix[256],neu_prefix[256],str_level[16];
1392   size_t         len;
1393 
1394   PetscFunctionBegin;
1395 
1396   /* compute prefixes */
1397   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1398   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1399   if (!pcbddc->current_level) {
1400     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1401     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1402     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1403     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1404   } else {
1405     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1406     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1407     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1408     len -= 15; /* remove "pc_bddc_coarse_" */
1409     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1410     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1411     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1412     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1413     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1414     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1415     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1416     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1417   }
1418 
1419   /* DIRICHLET PROBLEM */
1420   if (dirichlet) {
1421     if (pcbddc->issym) {
1422       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1423     }
1424     /* Matrix for Dirichlet problem is pcis->A_II */
1425     n_D = pcis->n - pcis->n_B;
1426     if (!pcbddc->ksp_D) { /* create object if not yet build */
1427       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1428       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1429       /* default */
1430       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1431       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1432       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1433       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1434       if (issbaij) {
1435         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1436       } else {
1437         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1438       }
1439       /* Allow user's customization */
1440       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1441       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1442     }
1443     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1444     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1445     if (!n_D) {
1446       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1447       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1448     }
1449     /* Set Up KSP for Dirichlet problem of BDDC */
1450     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1451     /* set ksp_D into pcis data */
1452     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1453     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1454     pcis->ksp_D = pcbddc->ksp_D;
1455   }
1456 
1457   /* NEUMANN PROBLEM */
1458   A_RR = 0;
1459   if (neumann) {
1460     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1461     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1462     if (pcbddc->ksp_R) { /* already created ksp */
1463       PetscInt nn_R;
1464       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1465       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1466       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1467       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1468         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1469         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1470         reuse = MAT_INITIAL_MATRIX;
1471       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1472         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1473           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1474           reuse = MAT_INITIAL_MATRIX;
1475         } else { /* safe to reuse the matrix */
1476           reuse = MAT_REUSE_MATRIX;
1477         }
1478       }
1479       /* last check */
1480       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1481         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1482         reuse = MAT_INITIAL_MATRIX;
1483       }
1484     } else { /* first time, so we need to create the matrix */
1485       reuse = MAT_INITIAL_MATRIX;
1486     }
1487     /* extract A_RR */
1488     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1489     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1490     if (ibs != mbs) {
1491       Mat newmat;
1492       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1493       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1494       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1495     } else {
1496       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1497     }
1498     if (pcbddc->issym) {
1499       ierr = MatSetOption(A_RR,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1500     }
1501     if (!pcbddc->ksp_R) { /* create object if not present */
1502       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1503       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1504       /* default */
1505       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1506       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1507       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1508       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1509       if (issbaij) {
1510         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1511       } else {
1512         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1513       }
1514       /* Allow user's customization */
1515       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1516       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1517     }
1518     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1519     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1520     if (!n_R) {
1521       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1522       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1523     }
1524     /* Set Up KSP for Neumann problem of BDDC */
1525     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1526   }
1527 
1528   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1529   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1530     if (pcbddc->dbg_flag) {
1531       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1532       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1533       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1534     }
1535     if (dirichlet) { /* Dirichlet */
1536       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1537       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1538       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1539       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1540       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1541       /* need to be adapted? */
1542       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1543       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1544       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1545       /* print info */
1546       if (pcbddc->dbg_flag) {
1547         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1548         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1549       }
1550       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1551         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1552       }
1553     }
1554     if (neumann) { /* Neumann */
1555       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1556       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1557       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1558       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1559       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1560       /* need to be adapted? */
1561       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1562       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1563       /* print info */
1564       if (pcbddc->dbg_flag) {
1565         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1566         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1567       }
1568       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1569         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1570       }
1571     }
1572   }
1573   /* free Neumann problem's matrix */
1574   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1575   PetscFunctionReturn(0);
1576 }
1577 
1578 #undef __FUNCT__
1579 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1580 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1581 {
1582   PetscErrorCode ierr;
1583   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1584 
1585   PetscFunctionBegin;
1586   if (applytranspose) {
1587     if (pcbddc->local_auxmat1) {
1588       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1589       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1590     }
1591     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1592   } else {
1593     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1594     if (pcbddc->local_auxmat1) {
1595       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1596       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1597     }
1598   }
1599   PetscFunctionReturn(0);
1600 }
1601 
1602 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1603 #undef __FUNCT__
1604 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1605 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1606 {
1607   PetscErrorCode ierr;
1608   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1609   PC_IS*            pcis = (PC_IS*)  (pc->data);
1610   const PetscScalar zero = 0.0;
1611 
1612   PetscFunctionBegin;
1613   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1614   if (applytranspose) {
1615     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1616     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1617   } else {
1618     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1619     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1620   }
1621   /* start communications from local primal nodes to rhs of coarse solver */
1622   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1623   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1624   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1625 
1626   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1627   /* TODO remove null space when doing multilevel */
1628   if (pcbddc->coarse_ksp) {
1629     Vec rhs,sol;
1630 
1631     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
1632     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
1633     if (applytranspose) {
1634       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1635     } else {
1636       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1637     }
1638   }
1639 
1640   /* Local solution on R nodes */
1641   if (pcis->n) {
1642     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1643     ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1644     ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1645     if (pcbddc->switch_static) {
1646       ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1647       ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1648     }
1649     ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1650     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1651     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1652     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1653     if (pcbddc->switch_static) {
1654       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1655       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1656     }
1657   }
1658 
1659   /* communications from coarse sol to local primal nodes */
1660   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1661   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1662 
1663   /* Sum contributions from two levels */
1664   if (applytranspose) {
1665     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1666     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1667   } else {
1668     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1669     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1670   }
1671   PetscFunctionReturn(0);
1672 }
1673 
1674 #undef __FUNCT__
1675 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1676 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1677 {
1678   PetscErrorCode ierr;
1679   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1680   PetscScalar    *array;
1681   Vec            from,to;
1682 
1683   PetscFunctionBegin;
1684   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1685     from = pcbddc->coarse_vec;
1686     to = pcbddc->vec1_P;
1687     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1688       Vec tvec;
1689 
1690       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1691       ierr = VecResetArray(tvec);CHKERRQ(ierr);
1692       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1693       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
1694       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
1695       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
1696     }
1697   } else { /* from local to global -> put data in coarse right hand side */
1698     from = pcbddc->vec1_P;
1699     to = pcbddc->coarse_vec;
1700   }
1701   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1702   PetscFunctionReturn(0);
1703 }
1704 
1705 #undef __FUNCT__
1706 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1707 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1708 {
1709   PetscErrorCode ierr;
1710   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1711   PetscScalar    *array;
1712   Vec            from,to;
1713 
1714   PetscFunctionBegin;
1715   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1716     from = pcbddc->coarse_vec;
1717     to = pcbddc->vec1_P;
1718   } else { /* from local to global -> put data in coarse right hand side */
1719     from = pcbddc->vec1_P;
1720     to = pcbddc->coarse_vec;
1721   }
1722   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1723   if (smode == SCATTER_FORWARD) {
1724     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1725       Vec tvec;
1726 
1727       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1728       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
1729       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
1730       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
1731     }
1732   } else {
1733     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
1734      ierr = VecResetArray(from);CHKERRQ(ierr);
1735     }
1736   }
1737   PetscFunctionReturn(0);
1738 }
1739 
1740 /* uncomment for testing purposes */
1741 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1742 #undef __FUNCT__
1743 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1744 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1745 {
1746   PetscErrorCode    ierr;
1747   PC_IS*            pcis = (PC_IS*)(pc->data);
1748   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1749   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1750   /* one and zero */
1751   PetscScalar       one=1.0,zero=0.0;
1752   /* space to store constraints and their local indices */
1753   PetscScalar       *temp_quadrature_constraint;
1754   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1755   /* iterators */
1756   PetscInt          i,j,k,total_counts,temp_start_ptr;
1757   /* BLAS integers */
1758   PetscBLASInt      lwork,lierr;
1759   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1760   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1761   /* reuse */
1762   PetscInt          olocal_primal_size;
1763   PetscInt          *oprimal_indices_local_idxs;
1764   /* change of basis */
1765   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1766   PetscBool         boolforchange,qr_needed;
1767   PetscBT           touched,change_basis,qr_needed_idx;
1768   /* auxiliary stuff */
1769   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1770   PetscInt          ncc,*gidxs=NULL,*permutation=NULL,*temp_indices_to_constraint_work=NULL;
1771   PetscScalar       *temp_quadrature_constraint_work=NULL;
1772   /* some quantities */
1773   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1774   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1775 
1776   PetscFunctionBegin;
1777   /* Destroy Mat objects computed previously */
1778   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1779   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1780 
1781   /* print some info */
1782   if (pcbddc->dbg_flag) {
1783     IS       vertices;
1784     PetscInt nv,nedges,nfaces;
1785     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
1786     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
1787     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
1788     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1789     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1790     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
1791     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
1792     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
1793     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1794   }
1795 
1796   if (!pcbddc->adaptive_selection) {
1797     IS           ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1798     MatNullSpace nearnullsp;
1799     const Vec    *nearnullvecs;
1800     Vec          *localnearnullsp;
1801     PetscScalar  *array;
1802     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
1803     PetscBool    nnsp_has_cnst;
1804     /* LAPACK working arrays for SVD or POD */
1805     PetscBool    skip_lapack;
1806     PetscScalar  *work;
1807     PetscReal    *singular_vals;
1808 #if defined(PETSC_USE_COMPLEX)
1809     PetscReal    *rwork;
1810 #endif
1811 #if defined(PETSC_MISSING_LAPACK_GESVD)
1812     PetscScalar  *temp_basis,*correlation_mat;
1813 #else
1814     PetscBLASInt dummy_int=1;
1815     PetscScalar  dummy_scalar=1.;
1816 #endif
1817 
1818     /* Get index sets for faces, edges and vertices from graph */
1819     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1820     /* free unneeded index sets */
1821     if (!pcbddc->use_vertices) {
1822       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1823     }
1824     if (!pcbddc->use_edges) {
1825       for (i=0;i<n_ISForEdges;i++) {
1826         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1827       }
1828       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1829       n_ISForEdges = 0;
1830     }
1831     if (!pcbddc->use_faces) {
1832       for (i=0;i<n_ISForFaces;i++) {
1833         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1834       }
1835       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1836       n_ISForFaces = 0;
1837     }
1838     /* HACKS (the following two blocks of code) */
1839     if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1840       pcbddc->use_change_of_basis = PETSC_TRUE;
1841       if (!ISForEdges) {
1842         pcbddc->use_change_on_faces = PETSC_TRUE;
1843       }
1844     }
1845     if (pcbddc->NullSpace) {
1846       /* use_change_of_basis should be consistent among processors */
1847       PetscBool tbool[2],gbool[2];
1848       tbool[0] = pcbddc->use_change_of_basis;
1849       tbool[1] = pcbddc->use_change_on_faces;
1850       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1851       pcbddc->use_change_of_basis = gbool[0];
1852       pcbddc->use_change_on_faces = gbool[1];
1853     }
1854 
1855     /* check if near null space is attached to global mat */
1856     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1857     if (nearnullsp) {
1858       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1859       /* remove any stored info */
1860       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1861       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1862       /* store information for BDDC solver reuse */
1863       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1864       pcbddc->onearnullspace = nearnullsp;
1865       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1866       for (i=0;i<nnsp_size;i++) {
1867         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1868       }
1869     } else { /* if near null space is not provided BDDC uses constants by default */
1870       nnsp_size = 0;
1871       nnsp_has_cnst = PETSC_TRUE;
1872     }
1873     /* get max number of constraints on a single cc */
1874     max_constraints = nnsp_size;
1875     if (nnsp_has_cnst) max_constraints++;
1876 
1877     /*
1878          Evaluate maximum storage size needed by the procedure
1879          - temp_indices will contain start index of each constraint stored as follows
1880          - temp_indices_to_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1881          - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1882          - temp_quadrature_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself
1883                                                                                                                                                            */
1884     total_counts = n_ISForFaces+n_ISForEdges;
1885     total_counts *= max_constraints;
1886     n_vertices = 0;
1887     if (ISForVertices) {
1888       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1889     }
1890     total_counts += n_vertices;
1891     ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr);
1892     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1893     total_counts = 0;
1894     max_size_of_constraint = 0;
1895     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1896       if (i<n_ISForEdges) {
1897         used_IS = &ISForEdges[i];
1898       } else {
1899         used_IS = &ISForFaces[i-n_ISForEdges];
1900       }
1901       ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1902       total_counts += j;
1903       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1904     }
1905     total_counts *= max_constraints;
1906     total_counts += n_vertices;
1907     ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1908     /* get local part of global near null space vectors */
1909     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1910     for (k=0;k<nnsp_size;k++) {
1911       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1912       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1913       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1914     }
1915 
1916     /* whether or not to skip lapack calls */
1917     skip_lapack = PETSC_TRUE;
1918     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
1919 
1920     /* allocate some auxiliary stuff */
1921     if (!skip_lapack || pcbddc->use_qr_single) {
1922       ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr);
1923     } else {
1924       gidxs = NULL;
1925       permutation = NULL;
1926       temp_indices_to_constraint_work = NULL;
1927       temp_quadrature_constraint_work = NULL;
1928     }
1929 
1930     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1931     if (!skip_lapack) {
1932       PetscScalar temp_work;
1933 
1934 #if defined(PETSC_MISSING_LAPACK_GESVD)
1935       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1936       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1937       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1938       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1939 #if defined(PETSC_USE_COMPLEX)
1940       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1941 #endif
1942       /* now we evaluate the optimal workspace using query with lwork=-1 */
1943       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1944       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1945       lwork = -1;
1946       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1947 #if !defined(PETSC_USE_COMPLEX)
1948       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1949 #else
1950       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1951 #endif
1952       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1953       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1954 #else /* on missing GESVD */
1955       /* SVD */
1956       PetscInt max_n,min_n;
1957       max_n = max_size_of_constraint;
1958       min_n = max_constraints;
1959       if (max_size_of_constraint < max_constraints) {
1960         min_n = max_size_of_constraint;
1961         max_n = max_constraints;
1962       }
1963       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1964 #if defined(PETSC_USE_COMPLEX)
1965       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1966 #endif
1967       /* now we evaluate the optimal workspace using query with lwork=-1 */
1968       lwork = -1;
1969       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1970       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1971       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1972       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1973 #if !defined(PETSC_USE_COMPLEX)
1974       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
1975 #else
1976       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
1977 #endif
1978       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1979       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1980 #endif /* on missing GESVD */
1981       /* Allocate optimal workspace */
1982       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1983       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
1984     }
1985     /* Now we can loop on constraining sets */
1986     total_counts = 0;
1987     temp_indices[0] = 0;
1988     /* vertices */
1989     if (ISForVertices) {
1990       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1991       if (nnsp_has_cnst) { /* consider all vertices */
1992         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1993         for (i=0;i<n_vertices;i++) {
1994           temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1995           temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1996           total_counts++;
1997         }
1998       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1999         PetscBool used_vertex;
2000         for (i=0;i<n_vertices;i++) {
2001           used_vertex = PETSC_FALSE;
2002           k = 0;
2003           while (!used_vertex && k<nnsp_size) {
2004             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2005             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
2006               temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
2007               temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
2008               temp_indices[total_counts+1]=temp_indices[total_counts]+1;
2009               total_counts++;
2010               used_vertex = PETSC_TRUE;
2011             }
2012             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2013             k++;
2014           }
2015         }
2016       }
2017       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2018       n_vertices = total_counts;
2019     }
2020 
2021     /* edges and faces */
2022     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2023       if (ncc<n_ISForEdges) {
2024         used_IS = &ISForEdges[ncc];
2025         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2026       } else {
2027         used_IS = &ISForFaces[ncc-n_ISForEdges];
2028         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2029       }
2030       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2031       temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
2032       ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
2033       ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2034       /* change of basis should not be performed on local periodic nodes */
2035       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2036       if (nnsp_has_cnst) {
2037         PetscScalar quad_value;
2038         temp_constraints++;
2039         if (!pcbddc->use_nnsp_true) {
2040           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2041         } else {
2042           quad_value = 1.0;
2043         }
2044         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2045         for (j=0;j<size_of_constraint;j++) {
2046           temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
2047         }
2048         /* sort by global ordering if using lapack subroutines (not needed!) */
2049         if (!skip_lapack || pcbddc->use_qr_single) {
2050           ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
2051           for (j=0;j<size_of_constraint;j++) {
2052             permutation[j]=j;
2053           }
2054           ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2055           for (j=0;j<size_of_constraint;j++) {
2056             if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen");
2057           }
2058           for (j=0;j<size_of_constraint;j++) {
2059             temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2060             temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2061           }
2062           ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2063           ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2064         }
2065         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2066         total_counts++;
2067       }
2068       for (k=0;k<nnsp_size;k++) {
2069         PetscReal real_value;
2070         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2071         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2072         for (j=0;j<size_of_constraint;j++) {
2073           temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
2074         }
2075         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2076         /* check if array is null on the connected component */
2077         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2078         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
2079         if (real_value > 0.0) { /* keep indices and values */
2080           /* sort by global ordering if using lapack subroutines */
2081           if (!skip_lapack || pcbddc->use_qr_single) {
2082             ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
2083             for (j=0;j<size_of_constraint;j++) {
2084               permutation[j]=j;
2085             }
2086             ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2087             for (j=0;j<size_of_constraint;j++) {
2088               temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2089               temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2090             }
2091             ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2092             ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2093           }
2094           temp_constraints++;
2095           temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2096           total_counts++;
2097         }
2098       }
2099       ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2100       valid_constraints = temp_constraints;
2101       if (!pcbddc->use_nnsp_true && temp_constraints) {
2102         if (temp_constraints == 1) { /* just normalize the constraint */
2103           PetscScalar norm;
2104           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2105           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2106           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2107           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2108         } else { /* perform SVD */
2109           PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2110 
2111 #if defined(PETSC_MISSING_LAPACK_GESVD)
2112           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2113              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2114              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2115                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2116                 from that computed using LAPACKgesvd
2117              -> This is due to a different computation of eigenvectors in LAPACKheev
2118              -> The quality of the POD-computed basis will be the same */
2119           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2120           /* Store upper triangular part of correlation matrix */
2121           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2122           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2123           for (j=0;j<temp_constraints;j++) {
2124             for (k=0;k<j+1;k++) {
2125               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one));
2126             }
2127           }
2128           /* compute eigenvalues and eigenvectors of correlation matrix */
2129           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2130           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2131 #if !defined(PETSC_USE_COMPLEX)
2132           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2133 #else
2134           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2135 #endif
2136           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2137           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2138           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2139           j = 0;
2140           while (j < temp_constraints && singular_vals[j] < tol) j++;
2141           total_counts = total_counts-j;
2142           valid_constraints = temp_constraints-j;
2143           /* scale and copy POD basis into used quadrature memory */
2144           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2145           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2146           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2147           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2148           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2149           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2150           if (j<temp_constraints) {
2151             PetscInt ii;
2152             for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
2153             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2154             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2155             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2156             for (k=0;k<temp_constraints-j;k++) {
2157               for (ii=0;ii<size_of_constraint;ii++) {
2158                 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2159               }
2160             }
2161           }
2162 #else  /* on missing GESVD */
2163           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2164           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2165           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2166           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2167 #if !defined(PETSC_USE_COMPLEX)
2168           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2169 #else
2170           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2171 #endif
2172           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2173           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2174           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2175           k = temp_constraints;
2176           if (k > size_of_constraint) k = size_of_constraint;
2177           j = 0;
2178           while (j < k && singular_vals[k-j-1] < tol) j++;
2179           valid_constraints = k-j;
2180           total_counts = total_counts-temp_constraints+valid_constraints;
2181 #endif /* on missing GESVD */
2182         }
2183       }
2184       /* setting change_of_basis flag is safe now */
2185       if (boolforchange) {
2186         for (j=0;j<valid_constraints;j++) {
2187           PetscBTSet(change_basis,total_counts-j-1);
2188         }
2189       }
2190     }
2191     /* free workspace */
2192     if (!skip_lapack || pcbddc->use_qr_single) {
2193       ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr);
2194     }
2195     if (!skip_lapack) {
2196       ierr = PetscFree(work);CHKERRQ(ierr);
2197 #if defined(PETSC_USE_COMPLEX)
2198       ierr = PetscFree(rwork);CHKERRQ(ierr);
2199 #endif
2200       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2201 #if defined(PETSC_MISSING_LAPACK_GESVD)
2202       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2203       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2204 #endif
2205     }
2206     for (k=0;k<nnsp_size;k++) {
2207       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2208     }
2209     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2210     /* free index sets of faces, edges and vertices */
2211     for (i=0;i<n_ISForFaces;i++) {
2212       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2213     }
2214     if (n_ISForFaces) {
2215       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2216     }
2217     for (i=0;i<n_ISForEdges;i++) {
2218       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2219     }
2220     if (n_ISForEdges) {
2221       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2222     }
2223     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2224   } else {
2225     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2226     PetscInt        cum = 0;
2227 
2228     total_counts = 0;
2229     n_vertices = 0;
2230     if (sub_schurs->is_Ej_com && pcbddc->use_vertices) {
2231       ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr);
2232     }
2233     max_constraints = 0;
2234     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2235       total_counts += pcbddc->adaptive_constraints_n[i];
2236       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2237     }
2238     temp_indices = pcbddc->adaptive_constraints_ptrs;
2239     temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs;
2240     temp_quadrature_constraint = pcbddc->adaptive_constraints_data;
2241 
2242 #if 0
2243     printf("Found %d totals\n",total_counts);
2244     for (i=0;i<total_counts;i++) {
2245       printf("const %d, start %d",i,temp_indices[i]);
2246       printf(" end %d:\n",temp_indices[i+1]);
2247       for (j=temp_indices[i];j<temp_indices[i+1];j++) {
2248         printf("  idxs %d",temp_indices_to_constraint[j]);
2249         printf("  data %1.2e\n",temp_quadrature_constraint[j]);
2250       }
2251     }
2252     for (i=0;i<n_vertices;i++) {
2253       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
2254     }
2255     for (i=0;i<sub_schurs->n_subs;i++) {
2256       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2257     }
2258 #endif
2259 
2260     max_size_of_constraint = 0;
2261     for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]);
2262     ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr);
2263     /* Change of basis */
2264     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2265     if (pcbddc->use_change_of_basis) {
2266       cum = n_vertices;
2267       for (i=0;i<sub_schurs->n_subs;i++) {
2268         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2269           for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) {
2270             ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr);
2271           }
2272         }
2273         cum += pcbddc->adaptive_constraints_n[i+n_vertices];
2274       }
2275     }
2276   }
2277 
2278   /* map temp_indices_to_constraint in boundary numbering */
2279   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
2280   if (i != temp_indices[total_counts]) {
2281     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
2282   }
2283 
2284   /* set quantities in pcbddc data structure and store previous primal size */
2285   /* n_vertices defines the number of subdomain corners in the primal space */
2286   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
2287   olocal_primal_size = pcbddc->local_primal_size;
2288   pcbddc->local_primal_size = total_counts;
2289   pcbddc->n_vertices = n_vertices;
2290   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
2291 
2292   /* Create constraint matrix */
2293   /* The constraint matrix is used to compute the l2g map of primal dofs */
2294   /* so we need to set it up properly either with or without change of basis */
2295   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2296   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2297   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2298   /* array to compute a local numbering of constraints : vertices first then constraints */
2299   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
2300   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
2301   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
2302   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
2303   /* auxiliary stuff for basis change */
2304   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
2305   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
2306 
2307   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2308   total_primal_vertices=0;
2309   for (i=0;i<pcbddc->local_primal_size;i++) {
2310     size_of_constraint=temp_indices[i+1]-temp_indices[i];
2311     if (size_of_constraint == 1) {
2312       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
2313       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
2314       aux_primal_minloc[total_primal_vertices]=0;
2315       total_primal_vertices++;
2316     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
2317       PetscInt min_loc,min_index;
2318       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
2319       /* find first untouched local node */
2320       k = 0;
2321       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
2322       min_index = global_indices[k];
2323       min_loc = k;
2324       /* search the minimum among global nodes already untouched on the cc */
2325       for (k=1;k<size_of_constraint;k++) {
2326         /* there can be more than one constraint on a single connected component */
2327         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
2328           min_index = global_indices[k];
2329           min_loc = k;
2330         }
2331       }
2332       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
2333       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
2334       aux_primal_minloc[total_primal_vertices]=min_loc;
2335       total_primal_vertices++;
2336     }
2337   }
2338   /* determine if a QR strategy is needed for change of basis */
2339   qr_needed = PETSC_FALSE;
2340   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
2341   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2342     if (PetscBTLookup(change_basis,i)) {
2343       if (!pcbddc->use_qr_single && !pcbddc->faster_deluxe) {
2344         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2345         j = 0;
2346         for (k=0;k<size_of_constraint;k++) {
2347           if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
2348             j++;
2349           }
2350         }
2351         /* found more than one primal dof on the cc */
2352         if (j > 1) {
2353           PetscBTSet(qr_needed_idx,i);
2354           qr_needed = PETSC_TRUE;
2355         }
2356       } else {
2357         PetscBTSet(qr_needed_idx,i);
2358         qr_needed = PETSC_TRUE;
2359       }
2360     }
2361   }
2362   /* free workspace */
2363   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2364 
2365   /* permute indices in order to have a sorted set of vertices */
2366   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
2367 
2368   /* nonzero structure of constraint matrix */
2369   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2370   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
2371   j=total_primal_vertices;
2372   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2373     if (!PetscBTLookup(change_basis,i)) {
2374       nnz[j]=temp_indices[i+1]-temp_indices[i];
2375       j++;
2376     }
2377   }
2378   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2379   ierr = PetscFree(nnz);CHKERRQ(ierr);
2380   /* set values in constraint matrix */
2381   for (i=0;i<total_primal_vertices;i++) {
2382     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2383   }
2384   total_counts = total_primal_vertices;
2385   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2386     if (!PetscBTLookup(change_basis,i)) {
2387       size_of_constraint=temp_indices[i+1]-temp_indices[i];
2388       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
2389       total_counts++;
2390     }
2391   }
2392   /* assembling */
2393   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2394   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2395   /*
2396   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2397   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2398   */
2399   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2400   if (pcbddc->use_change_of_basis) {
2401     /* dual and primal dofs on a single cc */
2402     PetscInt     dual_dofs,primal_dofs;
2403     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
2404     PetscInt     primal_counter;
2405     /* working stuff for GEQRF */
2406     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2407     PetscBLASInt lqr_work;
2408     /* working stuff for UNGQR */
2409     PetscScalar  *gqr_work,lgqr_work_t;
2410     PetscBLASInt lgqr_work;
2411     /* working stuff for TRTRS */
2412     PetscScalar  *trs_rhs;
2413     PetscBLASInt Blas_NRHS;
2414     /* pointers for values insertion into change of basis matrix */
2415     PetscInt     *start_rows,*start_cols;
2416     PetscScalar  *start_vals;
2417     /* working stuff for values insertion */
2418     PetscBT      is_primal;
2419     /* matrix sizes */
2420     PetscInt     global_size,local_size;
2421     /* temporary change of basis */
2422     Mat          localChangeOfBasisMatrix;
2423     /* extra space for debugging */
2424     PetscScalar  *dbg_work;
2425 
2426     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2427     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2428     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2429     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2430     /* nonzeros for local mat */
2431     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2432     for (i=0;i<pcis->n;i++) nnz[i]=1;
2433     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2434       if (PetscBTLookup(change_basis,i)) {
2435         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2436         if (PetscBTLookup(qr_needed_idx,i)) {
2437           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2438         } else {
2439           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2;
2440           /* get local primal index on the cc */
2441           j = 0;
2442           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
2443           nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2444         }
2445       }
2446     }
2447     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2448     ierr = PetscFree(nnz);CHKERRQ(ierr);
2449     /* Set initial identity in the matrix */
2450     for (i=0;i<pcis->n;i++) {
2451       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2452     }
2453 
2454     if (pcbddc->dbg_flag) {
2455       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2456       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2457     }
2458 
2459 
2460     /* Now we loop on the constraints which need a change of basis */
2461     /*
2462        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2463        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2464 
2465        Basic blocks of change of basis matrix T computed by
2466 
2467           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2468 
2469             | 1        0   ...        0         s_1/S |
2470             | 0        1   ...        0         s_2/S |
2471             |              ...                        |
2472             | 0        ...            1     s_{n-1}/S |
2473             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2474 
2475             with S = \sum_{i=1}^n s_i^2
2476             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2477                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2478 
2479           - QR decomposition of constraints otherwise
2480     */
2481     if (qr_needed) {
2482       /* space to store Q */
2483       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2484       /* first we issue queries for optimal work */
2485       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2486       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2487       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2488       lqr_work = -1;
2489       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2490       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2491       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2492       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2493       lgqr_work = -1;
2494       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2495       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2496       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2497       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2498       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2499       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2500       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2501       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2502       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2503       /* array to store scaling factors for reflectors */
2504       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2505       /* array to store rhs and solution of triangular solver */
2506       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2507       /* allocating workspace for check */
2508       if (pcbddc->dbg_flag) {
2509         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
2510       }
2511     }
2512     /* array to store whether a node is primal or not */
2513     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2514     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2515     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2516     if (i != total_primal_vertices) {
2517       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2518     }
2519     for (i=0;i<total_primal_vertices;i++) {
2520       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2521     }
2522     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2523 
2524     /* loop on constraints and see whether or not they need a change of basis and compute it */
2525     /* -> using implicit ordering contained in temp_indices data */
2526     total_counts = pcbddc->n_vertices;
2527     primal_counter = total_counts;
2528     while (total_counts<pcbddc->local_primal_size) {
2529       primal_dofs = 1;
2530       if (PetscBTLookup(change_basis,total_counts)) {
2531         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2532         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) {
2533           primal_dofs++;
2534         }
2535         /* get constraint info */
2536         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2537         dual_dofs = size_of_constraint-primal_dofs;
2538 
2539         if (pcbddc->dbg_flag) {
2540           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2541         }
2542 
2543         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2544 
2545           /* copy quadrature constraints for change of basis check */
2546           if (pcbddc->dbg_flag) {
2547             ierr = PetscMemcpy(dbg_work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2548           }
2549           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2550           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2551 
2552           /* compute QR decomposition of constraints */
2553           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2554           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2555           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2556           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2557           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2558           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2559           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2560 
2561           /* explictly compute R^-T */
2562           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2563           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2564           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2565           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2566           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2567           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2568           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2569           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2570           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2571           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2572 
2573           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2574           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2575           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2576           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2577           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2578           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2579           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2580           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2581           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2582 
2583           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2584              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2585              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2586           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2587           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2588           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2589           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2590           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2591           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2592           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2593           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2594           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2595           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2596 
2597           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2598           start_rows = &temp_indices_to_constraint[temp_indices[total_counts]];
2599           /* insert cols for primal dofs */
2600           for (j=0;j<primal_dofs;j++) {
2601             start_vals = &qr_basis[j*size_of_constraint];
2602             start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2603             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2604           }
2605           /* insert cols for dual dofs */
2606           for (j=0,k=0;j<dual_dofs;k++) {
2607             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2608               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2609               start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k];
2610               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2611               j++;
2612             }
2613           }
2614 
2615           /* check change of basis */
2616           if (pcbddc->dbg_flag) {
2617             PetscInt   ii,jj;
2618             PetscBool valid_qr=PETSC_TRUE;
2619             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2620             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2621             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2622             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2623             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2624             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2625             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2626             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
2627             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2628             for (jj=0;jj<size_of_constraint;jj++) {
2629               for (ii=0;ii<primal_dofs;ii++) {
2630                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2631                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2632               }
2633             }
2634             if (!valid_qr) {
2635               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2636               for (jj=0;jj<size_of_constraint;jj++) {
2637                 for (ii=0;ii<primal_dofs;ii++) {
2638                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2639                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2640                   }
2641                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2642                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2643                   }
2644                 }
2645               }
2646             } else {
2647               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2648             }
2649           }
2650         } else { /* simple transformation block */
2651           PetscInt    row,col;
2652           PetscScalar val,norm;
2653 
2654           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2655           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one));
2656           for (j=0;j<size_of_constraint;j++) {
2657             PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2658             row = temp_indices_to_constraint[temp_indices[total_counts]+j];
2659             if (!PetscBTLookup(is_primal,row_B)) {
2660               col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2661               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2662               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2663             } else {
2664               for (k=0;k<size_of_constraint;k++) {
2665                 col = temp_indices_to_constraint[temp_indices[total_counts]+k];
2666                 if (row != col) {
2667                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2668                 } else {
2669                   val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm;
2670                 }
2671                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2672               }
2673             }
2674           }
2675           if (pcbddc->dbg_flag) {
2676             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2677           }
2678         }
2679         /* increment primal counter */
2680         primal_counter += primal_dofs;
2681       } else {
2682         if (pcbddc->dbg_flag) {
2683           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2684         }
2685       }
2686       /* increment constraint counter total_counts */
2687       total_counts += primal_dofs;
2688     }
2689 
2690     /* free workspace */
2691     if (qr_needed) {
2692       if (pcbddc->dbg_flag) {
2693         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
2694       }
2695       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2696       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2697       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2698       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2699       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2700     }
2701     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2702     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2703     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2704 
2705     /* assembling of global change of variable */
2706     {
2707       Mat      tmat;
2708       PetscInt bs;
2709 
2710       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2711       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2712       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2713       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2714       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2715       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2716       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
2717       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
2718       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2719       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2720       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2721       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2722       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2723       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2724       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2725       ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2726       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2727       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2728     }
2729     /* check */
2730     if (pcbddc->dbg_flag) {
2731       PetscReal error;
2732       Vec       x,x_change;
2733 
2734       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2735       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2736       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2737       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2738       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2739       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2740       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2741       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2742       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2743       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2744       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2745       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2746       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2747       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2748       ierr = VecDestroy(&x);CHKERRQ(ierr);
2749       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2750     }
2751 
2752     /* adapt sub_schurs computed (if any) */
2753     if (pcbddc->use_deluxe_scaling) {
2754       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2755       if (sub_schurs->S_Ej_all) {
2756         Mat S_new,tmat;
2757         IS is_all_N;
2758 
2759         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2760         ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,&tmat);CHKERRQ(ierr);
2761         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2762         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2763         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2764         ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2765         sub_schurs->S_Ej_all = S_new;
2766         ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2767         if (sub_schurs->sum_S_Ej_all) {
2768           ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2769           ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2770           ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2771           sub_schurs->sum_S_Ej_all = S_new;
2772           ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2773         }
2774         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2775       }
2776     }
2777     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2778   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2779     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2780     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2781   }
2782 
2783   /* set up change of basis context */
2784   if (pcbddc->ChangeOfBasisMatrix) {
2785     PCBDDCChange_ctx change_ctx;
2786 
2787     if (!pcbddc->new_global_mat) {
2788       PetscInt global_size,local_size;
2789 
2790       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2791       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2792       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2793       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2794       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2795       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2796       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2797       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2798       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2799     } else {
2800       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2801       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2802       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2803     }
2804     if (!pcbddc->user_ChangeOfBasisMatrix) {
2805       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2806       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2807     } else {
2808       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2809       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2810     }
2811     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2812     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2813     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2814     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2815   }
2816 
2817   /* get indices in local ordering for vertices and constraints */
2818   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2819     ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
2820     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2821   }
2822   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2823   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2824   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2825   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2826   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2827   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2828   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2829   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2830   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2831   /* set quantities in PCBDDC data struct */
2832   pcbddc->n_actual_vertices = i;
2833   /* check if a new primal space has been introduced */
2834   pcbddc->new_primal_space_local = PETSC_TRUE;
2835   if (olocal_primal_size == pcbddc->local_primal_size) {
2836     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2837     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2838     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2839   }
2840   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2841   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2842 
2843   /* flush dbg viewer */
2844   if (pcbddc->dbg_flag) {
2845     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2846   }
2847 
2848   /* free workspace */
2849   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2850   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2851   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2852   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2853   if (!pcbddc->adaptive_selection) {
2854     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2855     ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr);
2856   } else {
2857     ierr = PetscFree4(pcbddc->adaptive_constraints_n,
2858                       pcbddc->adaptive_constraints_ptrs,
2859                       pcbddc->adaptive_constraints_idxs,
2860                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2861     ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2862   }
2863   PetscFunctionReturn(0);
2864 }
2865 
2866 #undef __FUNCT__
2867 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2868 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2869 {
2870   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2871   PC_IS       *pcis = (PC_IS*)pc->data;
2872   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2873   PetscInt    ierr,i,vertex_size;
2874   PetscViewer viewer=pcbddc->dbg_viewer;
2875 
2876   PetscFunctionBegin;
2877   /* Reset previously computed graph */
2878   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2879   /* Init local Graph struct */
2880   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2881 
2882   /* Check validity of the csr graph passed in by the user */
2883   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2884     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2885   }
2886 
2887   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2888   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2889     PetscInt  *xadj,*adjncy;
2890     PetscInt  nvtxs;
2891     PetscBool flg_row=PETSC_FALSE;
2892 
2893     if (pcbddc->use_local_adj) {
2894 
2895       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2896       if (flg_row) {
2897         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2898         pcbddc->computed_rowadj = PETSC_TRUE;
2899       }
2900       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2901     } else if (pcbddc->current_level) { /* just compute subdomain's connected components for coarser levels */
2902       IS                     is_dummy;
2903       ISLocalToGlobalMapping l2gmap_dummy;
2904       PetscInt               j,sum;
2905       PetscInt               *cxadj,*cadjncy;
2906       const PetscInt         *idxs;
2907       PCBDDCGraph            graph;
2908       PetscBT                is_on_boundary;
2909 
2910       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
2911       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
2912       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
2913       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
2914       ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr);
2915       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
2916       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2917       if (flg_row) {
2918         graph->xadj = xadj;
2919         graph->adjncy = adjncy;
2920       }
2921       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
2922       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
2923       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2924 
2925       if (pcbddc->dbg_flag) {
2926         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr);
2927         for (i=0;i<graph->ncc;i++) {
2928           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
2929         }
2930         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2931       }
2932 
2933       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
2934       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2935       for (i=0;i<pcis->n_B;i++) {
2936         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
2937       }
2938       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2939 
2940       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
2941       sum = 0;
2942       for (i=0;i<graph->ncc;i++) {
2943         PetscInt sizecc = 0;
2944         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2945           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2946             sizecc++;
2947           }
2948         }
2949         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2950           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2951             cxadj[graph->queue[j]] = sizecc;
2952           }
2953         }
2954         sum += sizecc*sizecc;
2955       }
2956       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
2957       sum = 0;
2958       for (i=0;i<pcis->n;i++) {
2959         PetscInt temp = cxadj[i];
2960         cxadj[i] = sum;
2961         sum += temp;
2962       }
2963       cxadj[pcis->n] = sum;
2964       for (i=0;i<graph->ncc;i++) {
2965         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2966           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2967             PetscInt k,sizecc = 0;
2968             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
2969               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
2970                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
2971                 sizecc++;
2972               }
2973             }
2974           }
2975         }
2976       }
2977       if (pcis->n) {
2978         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
2979       } else {
2980         ierr = PetscFree(cxadj);CHKERRQ(ierr);
2981         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
2982       }
2983       graph->xadj = 0;
2984       graph->adjncy = 0;
2985       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
2986       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
2987     }
2988   }
2989 
2990   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2991   vertex_size = 1;
2992   if (pcbddc->user_provided_isfordofs) {
2993     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2994       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2995       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2996         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2997         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2998       }
2999       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
3000       pcbddc->n_ISForDofs = 0;
3001       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
3002     }
3003     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
3004     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
3005   } else {
3006     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
3007       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
3008       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3009       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3010         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3011       }
3012     }
3013   }
3014 
3015   /* Setup of Graph */
3016   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3017     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
3018   }
3019   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3020     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
3021   }
3022   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr);
3023 
3024   /* Graph's connected components analysis */
3025   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
3026 
3027   /* print some info to stdout */
3028   if (pcbddc->dbg_flag) {
3029     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr);
3030   }
3031 
3032   /* mark topography has done */
3033   pcbddc->recompute_topography = PETSC_FALSE;
3034   PetscFunctionReturn(0);
3035 }
3036 
3037 #undef __FUNCT__
3038 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
3039 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
3040 {
3041   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
3042   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
3043   PetscErrorCode ierr;
3044 
3045   PetscFunctionBegin;
3046   n = 0;
3047   vertices = 0;
3048   if (pcbddc->ConstraintMatrix) {
3049     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
3050     for (i=0;i<local_primal_size;i++) {
3051       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3052       if (size_of_constraint == 1) n++;
3053       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3054     }
3055     if (vertices_idx) {
3056       ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr);
3057       n = 0;
3058       for (i=0;i<local_primal_size;i++) {
3059         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3060         if (size_of_constraint == 1) {
3061           vertices[n++]=row_cmat_indices[0];
3062         }
3063         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3064       }
3065     }
3066   }
3067   *n_vertices = n;
3068   if (vertices_idx) *vertices_idx = vertices;
3069   PetscFunctionReturn(0);
3070 }
3071 
3072 #undef __FUNCT__
3073 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
3074 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
3075 {
3076   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
3077   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
3078   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
3079   PetscBT        touched;
3080   PetscErrorCode ierr;
3081 
3082     /* This function assumes that the number of local constraints per connected component
3083        is not greater than the number of nodes defined for the connected component
3084        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
3085   PetscFunctionBegin;
3086   n = 0;
3087   constraints_index = 0;
3088   if (pcbddc->ConstraintMatrix) {
3089     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
3090     max_size_of_constraint = 0;
3091     for (i=0;i<local_primal_size;i++) {
3092       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3093       if (size_of_constraint > 1) {
3094         n++;
3095       }
3096       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
3097       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3098     }
3099     if (constraints_idx) {
3100       ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr);
3101       ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr);
3102       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
3103       n = 0;
3104       for (i=0;i<local_primal_size;i++) {
3105         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3106         if (size_of_constraint > 1) {
3107           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
3108           /* find first untouched local node */
3109           j = 0;
3110           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
3111           min_index = row_cmat_global_indices[j];
3112           min_loc = j;
3113           /* search the minimum among nodes not yet touched on the connected component
3114              since there can be more than one constraint on a single cc */
3115           for (j=1;j<size_of_constraint;j++) {
3116             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
3117               min_index = row_cmat_global_indices[j];
3118               min_loc = j;
3119             }
3120           }
3121           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
3122           constraints_index[n++] = row_cmat_indices[min_loc];
3123         }
3124         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3125       }
3126       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
3127       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
3128     }
3129   }
3130   *n_constraints = n;
3131   if (constraints_idx) *constraints_idx = constraints_index;
3132   PetscFunctionReturn(0);
3133 }
3134 
3135 #undef __FUNCT__
3136 #define __FUNCT__ "PCBDDCSubsetNumbering"
3137 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
3138 {
3139   Vec            local_vec,global_vec;
3140   IS             seqis,paris;
3141   VecScatter     scatter_ctx;
3142   PetscScalar    *array;
3143   PetscInt       *temp_global_dofs;
3144   PetscScalar    globalsum;
3145   PetscInt       i,j,s;
3146   PetscInt       nlocals,first_index,old_index,max_local,max_global;
3147   PetscMPIInt    rank_prec_comm,size_prec_comm;
3148   PetscInt       *dof_sizes,*dof_displs;
3149   PetscBool      first_found;
3150   PetscErrorCode ierr;
3151 
3152   PetscFunctionBegin;
3153   /* mpi buffers */
3154   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
3155   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
3156   j = ( !rank_prec_comm ? size_prec_comm : 0);
3157   ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr);
3158   /* get maximum size of subset */
3159   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
3160   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
3161   max_local = 0;
3162   for (i=0;i<n_local_dofs;i++) {
3163     if (max_local < temp_global_dofs[i] ) {
3164       max_local = temp_global_dofs[i];
3165     }
3166   }
3167   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
3168   max_global++;
3169   max_local = 0;
3170   for (i=0;i<n_local_dofs;i++) {
3171     if (max_local < local_dofs[i] ) {
3172       max_local = local_dofs[i];
3173     }
3174   }
3175   max_local++;
3176   /* allocate workspace */
3177   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
3178   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
3179   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
3180   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
3181   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
3182   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
3183   /* create scatter */
3184   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
3185   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
3186   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
3187   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
3188   ierr = ISDestroy(&paris);CHKERRQ(ierr);
3189   /* init array */
3190   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
3191   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3192   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3193   if (local_dofs_mult) {
3194     for (i=0;i<n_local_dofs;i++) {
3195       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
3196     }
3197   } else {
3198     for (i=0;i<n_local_dofs;i++) {
3199       array[local_dofs[i]]=1.0;
3200     }
3201   }
3202   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3203   /* scatter into global vec and get total number of global dofs */
3204   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3205   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3206   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
3207   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
3208   /* Fill global_vec with cumulative function for global numbering */
3209   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
3210   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
3211   nlocals = 0;
3212   first_index = -1;
3213   first_found = PETSC_FALSE;
3214   for (i=0;i<s;i++) {
3215     if (!first_found && PetscRealPart(array[i]) > 0.1) {
3216       first_found = PETSC_TRUE;
3217       first_index = i;
3218     }
3219     nlocals += (PetscInt)PetscRealPart(array[i]);
3220   }
3221   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3222   if (!rank_prec_comm) {
3223     dof_displs[0]=0;
3224     for (i=1;i<size_prec_comm;i++) {
3225       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3226     }
3227   }
3228   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3229   if (first_found) {
3230     array[first_index] += (PetscScalar)nlocals;
3231     old_index = first_index;
3232     for (i=first_index+1;i<s;i++) {
3233       if (PetscRealPart(array[i]) > 0.1) {
3234         array[i] += array[old_index];
3235         old_index = i;
3236       }
3237     }
3238   }
3239   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
3240   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3241   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3242   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3243   /* get global ordering of local dofs */
3244   ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3245   if (local_dofs_mult) {
3246     for (i=0;i<n_local_dofs;i++) {
3247       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
3248     }
3249   } else {
3250     for (i=0;i<n_local_dofs;i++) {
3251       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
3252     }
3253   }
3254   ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3255   /* free workspace */
3256   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
3257   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
3258   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
3259   ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr);
3260   /* return pointer to global ordering of local dofs */
3261   *global_numbering_subset = temp_global_dofs;
3262   PetscFunctionReturn(0);
3263 }
3264 
3265 #undef __FUNCT__
3266 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3267 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3268 {
3269   PetscInt       i,j;
3270   PetscScalar    *alphas;
3271   PetscErrorCode ierr;
3272 
3273   PetscFunctionBegin;
3274   /* this implements stabilized Gram-Schmidt */
3275   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3276   for (i=0;i<n;i++) {
3277     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3278     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3279     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3280   }
3281   ierr = PetscFree(alphas);CHKERRQ(ierr);
3282   PetscFunctionReturn(0);
3283 }
3284 
3285 #undef __FUNCT__
3286 #define __FUNCT__ "MatISGetSubassemblingPattern"
3287 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
3288 {
3289   Mat             subdomain_adj;
3290   IS              new_ranks,ranks_send_to;
3291   MatPartitioning partitioner;
3292   Mat_IS          *matis;
3293   PetscInt        n_neighs,*neighs,*n_shared,**shared;
3294   PetscInt        prank;
3295   PetscMPIInt     size,rank,color;
3296   PetscInt        *xadj,*adjncy,*oldranks;
3297   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
3298   PetscInt        i,local_size,threshold=0;
3299   PetscErrorCode  ierr;
3300   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3301   PetscSubcomm    subcomm;
3302 
3303   PetscFunctionBegin;
3304   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3305   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3306   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3307 
3308   /* Get info on mapping */
3309   matis = (Mat_IS*)(mat->data);
3310   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3311   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3312 
3313   /* build local CSR graph of subdomains' connectivity */
3314   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3315   xadj[0] = 0;
3316   xadj[1] = PetscMax(n_neighs-1,0);
3317   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3318   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3319 
3320   if (threshold) {
3321     PetscInt xadj_count = 0;
3322     for (i=1;i<n_neighs;i++) {
3323       if (n_shared[i] > threshold) {
3324         adjncy[xadj_count] = neighs[i];
3325         adjncy_wgt[xadj_count] = n_shared[i];
3326         xadj_count++;
3327       }
3328     }
3329     xadj[1] = xadj_count;
3330   } else {
3331     if (xadj[1]) {
3332       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3333       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3334     }
3335   }
3336   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3337   if (use_square) {
3338     for (i=0;i<xadj[1];i++) {
3339       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3340     }
3341   }
3342   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3343 
3344   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3345 
3346   /*
3347     Restrict work on active processes only.
3348   */
3349   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3350   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3351   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3352   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3353   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3354   if (color) {
3355     ierr = PetscFree(xadj);CHKERRQ(ierr);
3356     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3357     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3358   } else {
3359     PetscInt coarsening_ratio;
3360     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
3361     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3362     prank = rank;
3363     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
3364     /*
3365     for (i=0;i<size;i++) {
3366       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3367     }
3368     */
3369     for (i=0;i<xadj[1];i++) {
3370       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3371     }
3372     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3373     ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3374     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3375 
3376     /* Partition */
3377     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
3378     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3379     if (use_vwgt) {
3380       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3381       v_wgt[0] = local_size;
3382       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3383     }
3384     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3385     coarsening_ratio = size/n_subdomains;
3386     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3387     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3388     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3389     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3390 
3391     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3392     if (contiguous) {
3393       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3394     } else {
3395       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3396     }
3397     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3398     /* clean up */
3399     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3400     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3401     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3402     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3403   }
3404   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3405 
3406   /* assemble parallel IS for sends */
3407   i = 1;
3408   if (color) i=0;
3409   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3410 
3411   /* get back IS */
3412   *is_sends = ranks_send_to;
3413   PetscFunctionReturn(0);
3414 }
3415 
3416 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3417 
3418 #undef __FUNCT__
3419 #define __FUNCT__ "MatISSubassemble"
3420 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3421 {
3422   Mat                    local_mat;
3423   Mat_IS                 *matis;
3424   IS                     is_sends_internal;
3425   PetscInt               rows,cols,new_local_rows;
3426   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3427   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3428   ISLocalToGlobalMapping l2gmap;
3429   PetscInt*              l2gmap_indices;
3430   const PetscInt*        is_indices;
3431   MatType                new_local_type;
3432   /* buffers */
3433   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3434   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3435   PetscInt               *recv_buffer_idxs_local;
3436   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3437   /* MPI */
3438   MPI_Comm               comm,comm_n;
3439   PetscSubcomm           subcomm;
3440   PetscMPIInt            n_sends,n_recvs,commsize;
3441   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3442   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3443   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3444   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3445   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3446   PetscErrorCode         ierr;
3447 
3448   PetscFunctionBegin;
3449   /* TODO: add missing checks */
3450   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3451   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3452   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3453   PetscValidLogicalCollectiveInt(mat,nis,7);
3454   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3455   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3456   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3457   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3458   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3459   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3460   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3461   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3462     PetscInt mrows,mcols,mnrows,mncols;
3463     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3464     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3465     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3466     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3467     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3468     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3469   }
3470   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3471   PetscValidLogicalCollectiveInt(mat,bs,0);
3472   /* prepare IS for sending if not provided */
3473   if (!is_sends) {
3474     PetscBool pcontig = PETSC_TRUE;
3475     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3476     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3477   } else {
3478     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3479     is_sends_internal = is_sends;
3480   }
3481 
3482   /* get pointer of MATIS data */
3483   matis = (Mat_IS*)mat->data;
3484 
3485   /* get comm */
3486   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3487 
3488   /* compute number of sends */
3489   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3490   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3491 
3492   /* compute number of receives */
3493   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3494   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3495   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3496   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3497   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3498   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3499   ierr = PetscFree(iflags);CHKERRQ(ierr);
3500 
3501   /* restrict comm if requested */
3502   subcomm = 0;
3503   destroy_mat = PETSC_FALSE;
3504   if (restrict_comm) {
3505     PetscMPIInt color,subcommsize;
3506 
3507     color = 0;
3508     if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */
3509     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3510     subcommsize = commsize - subcommsize;
3511     /* check if reuse has been requested */
3512     if (reuse == MAT_REUSE_MATRIX) {
3513       if (*mat_n) {
3514         PetscMPIInt subcommsize2;
3515         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3516         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3517         comm_n = PetscObjectComm((PetscObject)*mat_n);
3518       } else {
3519         comm_n = PETSC_COMM_SELF;
3520       }
3521     } else { /* MAT_INITIAL_MATRIX */
3522       PetscMPIInt rank;
3523 
3524       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3525       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3526       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3527       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3528       comm_n = PetscSubcommChild(subcomm);
3529     }
3530     /* flag to destroy *mat_n if not significative */
3531     if (color) destroy_mat = PETSC_TRUE;
3532   } else {
3533     comm_n = comm;
3534   }
3535 
3536   /* prepare send/receive buffers */
3537   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3538   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3539   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3540   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3541   if (nis) {
3542     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3543   }
3544 
3545   /* Get data from local matrices */
3546   if (!isdense) {
3547     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3548     /* TODO: See below some guidelines on how to prepare the local buffers */
3549     /*
3550        send_buffer_vals should contain the raw values of the local matrix
3551        send_buffer_idxs should contain:
3552        - MatType_PRIVATE type
3553        - PetscInt        size_of_l2gmap
3554        - PetscInt        global_row_indices[size_of_l2gmap]
3555        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3556     */
3557   } else {
3558     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3559     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3560     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3561     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3562     send_buffer_idxs[1] = i;
3563     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3564     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3565     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3566     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3567     for (i=0;i<n_sends;i++) {
3568       ilengths_vals[is_indices[i]] = len*len;
3569       ilengths_idxs[is_indices[i]] = len+2;
3570     }
3571   }
3572   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3573   /* additional is (if any) */
3574   if (nis) {
3575     PetscMPIInt psum;
3576     PetscInt j;
3577     for (j=0,psum=0;j<nis;j++) {
3578       PetscInt plen;
3579       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3580       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3581       psum += len+1; /* indices + lenght */
3582     }
3583     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3584     for (j=0,psum=0;j<nis;j++) {
3585       PetscInt plen;
3586       const PetscInt *is_array_idxs;
3587       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3588       send_buffer_idxs_is[psum] = plen;
3589       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3590       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3591       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3592       psum += plen+1; /* indices + lenght */
3593     }
3594     for (i=0;i<n_sends;i++) {
3595       ilengths_idxs_is[is_indices[i]] = psum;
3596     }
3597     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3598   }
3599 
3600   buf_size_idxs = 0;
3601   buf_size_vals = 0;
3602   buf_size_idxs_is = 0;
3603   for (i=0;i<n_recvs;i++) {
3604     buf_size_idxs += (PetscInt)olengths_idxs[i];
3605     buf_size_vals += (PetscInt)olengths_vals[i];
3606     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3607   }
3608   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3609   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3610   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3611 
3612   /* get new tags for clean communications */
3613   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3614   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3615   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3616 
3617   /* allocate for requests */
3618   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3619   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3620   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3621   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3622   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3623   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3624 
3625   /* communications */
3626   ptr_idxs = recv_buffer_idxs;
3627   ptr_vals = recv_buffer_vals;
3628   ptr_idxs_is = recv_buffer_idxs_is;
3629   for (i=0;i<n_recvs;i++) {
3630     source_dest = onodes[i];
3631     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3632     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3633     ptr_idxs += olengths_idxs[i];
3634     ptr_vals += olengths_vals[i];
3635     if (nis) {
3636       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3637       ptr_idxs_is += olengths_idxs_is[i];
3638     }
3639   }
3640   for (i=0;i<n_sends;i++) {
3641     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3642     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3643     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3644     if (nis) {
3645       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3646     }
3647   }
3648   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3649   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3650 
3651   /* assemble new l2g map */
3652   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3653   ptr_idxs = recv_buffer_idxs;
3654   new_local_rows = 0;
3655   for (i=0;i<n_recvs;i++) {
3656     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3657     ptr_idxs += olengths_idxs[i];
3658   }
3659   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3660   ptr_idxs = recv_buffer_idxs;
3661   new_local_rows = 0;
3662   for (i=0;i<n_recvs;i++) {
3663     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3664     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3665     ptr_idxs += olengths_idxs[i];
3666   }
3667   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3668   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3669   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3670 
3671   /* infer new local matrix type from received local matrices type */
3672   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3673   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3674   if (n_recvs) {
3675     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3676     ptr_idxs = recv_buffer_idxs;
3677     for (i=0;i<n_recvs;i++) {
3678       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3679         new_local_type_private = MATAIJ_PRIVATE;
3680         break;
3681       }
3682       ptr_idxs += olengths_idxs[i];
3683     }
3684     switch (new_local_type_private) {
3685       case MATDENSE_PRIVATE:
3686         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3687           new_local_type = MATSEQAIJ;
3688           bs = 1;
3689         } else { /* if I receive only 1 dense matrix */
3690           new_local_type = MATSEQDENSE;
3691           bs = 1;
3692         }
3693         break;
3694       case MATAIJ_PRIVATE:
3695         new_local_type = MATSEQAIJ;
3696         bs = 1;
3697         break;
3698       case MATBAIJ_PRIVATE:
3699         new_local_type = MATSEQBAIJ;
3700         break;
3701       case MATSBAIJ_PRIVATE:
3702         new_local_type = MATSEQSBAIJ;
3703         break;
3704       default:
3705         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3706         break;
3707     }
3708   } else { /* by default, new_local_type is seqdense */
3709     new_local_type = MATSEQDENSE;
3710     bs = 1;
3711   }
3712 
3713   /* create MATIS object if needed */
3714   if (reuse == MAT_INITIAL_MATRIX) {
3715     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3716     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3717   } else {
3718     /* it also destroys the local matrices */
3719     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3720   }
3721   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3722   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3723 
3724   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3725 
3726   /* Global to local map of received indices */
3727   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3728   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3729   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3730 
3731   /* restore attributes -> type of incoming data and its size */
3732   buf_size_idxs = 0;
3733   for (i=0;i<n_recvs;i++) {
3734     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3735     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3736     buf_size_idxs += (PetscInt)olengths_idxs[i];
3737   }
3738   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3739 
3740   /* set preallocation */
3741   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3742   if (!newisdense) {
3743     PetscInt *new_local_nnz=0;
3744 
3745     ptr_vals = recv_buffer_vals;
3746     ptr_idxs = recv_buffer_idxs_local;
3747     if (n_recvs) {
3748       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3749     }
3750     for (i=0;i<n_recvs;i++) {
3751       PetscInt j;
3752       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3753         for (j=0;j<*(ptr_idxs+1);j++) {
3754           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3755         }
3756       } else {
3757         /* TODO */
3758       }
3759       ptr_idxs += olengths_idxs[i];
3760     }
3761     if (new_local_nnz) {
3762       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3763       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3764       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3765       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3766       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3767       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3768     } else {
3769       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3770     }
3771     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3772   } else {
3773     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3774   }
3775 
3776   /* set values */
3777   ptr_vals = recv_buffer_vals;
3778   ptr_idxs = recv_buffer_idxs_local;
3779   for (i=0;i<n_recvs;i++) {
3780     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3781       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3782       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3783       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3784       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3785       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3786     } else {
3787       /* TODO */
3788     }
3789     ptr_idxs += olengths_idxs[i];
3790     ptr_vals += olengths_vals[i];
3791   }
3792   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3793   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3794   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3795   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3796   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3797   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3798 
3799 #if 0
3800   if (!restrict_comm) { /* check */
3801     Vec       lvec,rvec;
3802     PetscReal infty_error;
3803 
3804     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3805     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3806     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3807     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3808     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3809     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3810     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3811     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3812     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3813   }
3814 #endif
3815 
3816   /* assemble new additional is (if any) */
3817   if (nis) {
3818     PetscInt **temp_idxs,*count_is,j,psum;
3819 
3820     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3821     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3822     ptr_idxs = recv_buffer_idxs_is;
3823     psum = 0;
3824     for (i=0;i<n_recvs;i++) {
3825       for (j=0;j<nis;j++) {
3826         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3827         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3828         psum += plen;
3829         ptr_idxs += plen+1; /* shift pointer to received data */
3830       }
3831     }
3832     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3833     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3834     for (i=1;i<nis;i++) {
3835       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3836     }
3837     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3838     ptr_idxs = recv_buffer_idxs_is;
3839     for (i=0;i<n_recvs;i++) {
3840       for (j=0;j<nis;j++) {
3841         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3842         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3843         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3844         ptr_idxs += plen+1; /* shift pointer to received data */
3845       }
3846     }
3847     for (i=0;i<nis;i++) {
3848       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3849       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3850       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3851     }
3852     ierr = PetscFree(count_is);CHKERRQ(ierr);
3853     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3854     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3855   }
3856   /* free workspace */
3857   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3858   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3859   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3860   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3861   if (isdense) {
3862     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3863     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3864   } else {
3865     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3866   }
3867   if (nis) {
3868     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3869     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3870   }
3871   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3872   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3873   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3874   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3875   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3876   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3877   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3878   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3879   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3880   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3881   ierr = PetscFree(onodes);CHKERRQ(ierr);
3882   if (nis) {
3883     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3884     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3885     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3886   }
3887   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3888   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3889     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3890     for (i=0;i<nis;i++) {
3891       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3892     }
3893   }
3894   PetscFunctionReturn(0);
3895 }
3896 
3897 /* temporary hack into ksp private data structure */
3898 #include <petsc/private/kspimpl.h>
3899 
3900 #undef __FUNCT__
3901 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3902 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3903 {
3904   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3905   PC_IS                  *pcis = (PC_IS*)pc->data;
3906   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3907   MatNullSpace           CoarseNullSpace=NULL;
3908   ISLocalToGlobalMapping coarse_islg;
3909   IS                     coarse_is,*isarray;
3910   PetscInt               i,im_active=-1,active_procs=-1;
3911   PetscInt               nis,nisdofs,nisneu;
3912   PC                     pc_temp;
3913   PCType                 coarse_pc_type;
3914   KSPType                coarse_ksp_type;
3915   PetscBool              multilevel_requested,multilevel_allowed;
3916   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3917   Mat                    t_coarse_mat_is;
3918   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3919   PetscMPIInt            all_procs;
3920   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3921   PetscBool              compute_vecs = PETSC_FALSE;
3922   PetscScalar            *array;
3923   PetscErrorCode         ierr;
3924 
3925   PetscFunctionBegin;
3926   /* Assign global numbering to coarse dofs */
3927   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3928     PetscInt ocoarse_size;
3929     compute_vecs = PETSC_TRUE;
3930     ocoarse_size = pcbddc->coarse_size;
3931     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3932     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3933     /* see if we can avoid some work */
3934     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3935       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3936         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3937         coarse_reuse = PETSC_FALSE;
3938       } else { /* we can safely reuse already computed coarse matrix */
3939         coarse_reuse = PETSC_TRUE;
3940       }
3941     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3942       coarse_reuse = PETSC_FALSE;
3943     }
3944     /* reset any subassembling information */
3945     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3946     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3947   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3948     coarse_reuse = PETSC_TRUE;
3949   }
3950 
3951   /* count "active" (i.e. with positive local size) and "void" processes */
3952   im_active = !!(pcis->n);
3953   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3954   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3955   void_procs = all_procs-active_procs;
3956   csin_type_simple = PETSC_TRUE;
3957   redist = PETSC_FALSE;
3958   if (pcbddc->current_level && void_procs) {
3959     csin_ml = PETSC_TRUE;
3960     ncoarse_ml = void_procs;
3961     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
3962     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
3963       csin_ds = PETSC_TRUE;
3964       ncoarse_ds = pcbddc->redistribute_coarse;
3965       redist = PETSC_TRUE;
3966     } else {
3967       csin_ds = PETSC_TRUE;
3968       ncoarse_ds = active_procs;
3969       redist = PETSC_TRUE;
3970     }
3971   } else {
3972     csin_ml = PETSC_FALSE;
3973     ncoarse_ml = all_procs;
3974     if (void_procs) {
3975       csin_ds = PETSC_TRUE;
3976       ncoarse_ds = void_procs;
3977       csin_type_simple = PETSC_FALSE;
3978     } else {
3979       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
3980         csin_ds = PETSC_TRUE;
3981         ncoarse_ds = pcbddc->redistribute_coarse;
3982         redist = PETSC_TRUE;
3983       } else {
3984         csin_ds = PETSC_FALSE;
3985         ncoarse_ds = all_procs;
3986       }
3987     }
3988   }
3989 
3990   /*
3991     test if we can go multilevel: three conditions must be satisfied:
3992     - we have not exceeded the number of levels requested
3993     - we can actually subassemble the active processes
3994     - we can find a suitable number of MPI processes where we can place the subassembled problem
3995   */
3996   multilevel_allowed = PETSC_FALSE;
3997   multilevel_requested = PETSC_FALSE;
3998   if (pcbddc->current_level < pcbddc->max_levels) {
3999     multilevel_requested = PETSC_TRUE;
4000     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
4001       multilevel_allowed = PETSC_FALSE;
4002     } else {
4003       multilevel_allowed = PETSC_TRUE;
4004     }
4005   }
4006   /* determine number of process partecipating to coarse solver */
4007   if (multilevel_allowed) {
4008     ncoarse = ncoarse_ml;
4009     csin = csin_ml;
4010     redist = PETSC_FALSE;
4011   } else {
4012     ncoarse = ncoarse_ds;
4013     csin = csin_ds;
4014   }
4015 
4016   /* creates temporary l2gmap and IS for coarse indexes */
4017   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
4018   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
4019 
4020   /* creates temporary MATIS object for coarse matrix */
4021   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
4022   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4023   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
4024   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4025 #if 0
4026   {
4027     PetscViewer viewer;
4028     char filename[256];
4029     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
4030     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
4031     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4032     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
4033     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4034   }
4035 #endif
4036   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
4037   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
4038   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4039   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4040   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
4041 
4042   /* compute dofs splitting and neumann boundaries for coarse dofs */
4043   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
4044     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
4045     const PetscInt         *idxs;
4046     ISLocalToGlobalMapping tmap;
4047 
4048     /* create map between primal indices (in local representative ordering) and local primal numbering */
4049     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
4050     /* allocate space for temporary storage */
4051     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
4052     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
4053     /* allocate for IS array */
4054     nisdofs = pcbddc->n_ISForDofsLocal;
4055     nisneu = !!pcbddc->NeumannBoundariesLocal;
4056     nis = nisdofs + nisneu;
4057     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
4058     /* dofs splitting */
4059     for (i=0;i<nisdofs;i++) {
4060       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4061       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4062       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4063       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4064       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4065       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4066       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4067       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4068     }
4069     /* neumann boundaries */
4070     if (pcbddc->NeumannBoundariesLocal) {
4071       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4072       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4073       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4074       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4075       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4076       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4077       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4078       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4079     }
4080     /* free memory */
4081     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4082     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4083     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4084   } else {
4085     nis = 0;
4086     nisdofs = 0;
4087     nisneu = 0;
4088     isarray = NULL;
4089   }
4090   /* destroy no longer needed map */
4091   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4092 
4093   /* restrict on coarse candidates (if needed) */
4094   coarse_mat_is = NULL;
4095   if (csin) {
4096     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4097       if (redist) {
4098         PetscMPIInt rank;
4099         PetscInt    spc,n_spc_p1,dest[1],destsize;
4100 
4101         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4102         spc = active_procs/ncoarse;
4103         n_spc_p1 = active_procs%ncoarse;
4104         if (im_active) {
4105           destsize = 1;
4106           if (rank > n_spc_p1*(spc+1)-1) {
4107             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4108           } else {
4109             dest[0] = rank/(spc+1);
4110           }
4111         } else {
4112           destsize = 0;
4113         }
4114         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4115       } else if (csin_type_simple) {
4116         PetscMPIInt rank;
4117         PetscInt    issize,isidx;
4118 
4119         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4120         if (im_active) {
4121           issize = 1;
4122           isidx = (PetscInt)rank;
4123         } else {
4124           issize = 0;
4125           isidx = -1;
4126         }
4127         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4128       } else { /* get a suitable subassembling pattern from MATIS code */
4129         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4130       }
4131 
4132       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4133       if (!redist || ncoarse <= void_procs) {
4134         PetscInt ncoarse_cand,tissize,*nisindices;
4135         PetscInt *coarse_candidates;
4136         const PetscInt* tisindices;
4137 
4138         /* get coarse candidates' ranks in pc communicator */
4139         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4140         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4141         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4142           if (!coarse_candidates[i]) {
4143             coarse_candidates[ncoarse_cand++]=i;
4144           }
4145         }
4146         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4147 
4148 
4149         if (pcbddc->dbg_flag) {
4150           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4151           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4152           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4153           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4154           for (i=0;i<ncoarse_cand;i++) {
4155             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4156           }
4157           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4158           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4159         }
4160         /* shift the pattern on coarse candidates */
4161         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4162         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4163         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4164         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4165         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4166         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4167         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4168       }
4169       if (pcbddc->dbg_flag) {
4170         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4171         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4172         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4173         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4174       }
4175     }
4176     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4177     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4178   } else {
4179     if (pcbddc->dbg_flag) {
4180       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4181       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4182       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4183     }
4184     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4185     coarse_mat_is = t_coarse_mat_is;
4186   }
4187 
4188   /* create local to global scatters for coarse problem */
4189   if (compute_vecs) {
4190     PetscInt lrows;
4191     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4192     if (coarse_mat_is) {
4193       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4194     } else {
4195       lrows = 0;
4196     }
4197     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4198     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4199     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4200     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4201     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4202   }
4203   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4204   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4205 
4206   /* set defaults for coarse KSP and PC */
4207   if (multilevel_allowed) {
4208     coarse_ksp_type = KSPRICHARDSON;
4209     coarse_pc_type = PCBDDC;
4210   } else {
4211     coarse_ksp_type = KSPPREONLY;
4212     coarse_pc_type = PCREDUNDANT;
4213   }
4214 
4215   /* print some info if requested */
4216   if (pcbddc->dbg_flag) {
4217     if (!multilevel_allowed) {
4218       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4219       if (multilevel_requested) {
4220         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4221       } else if (pcbddc->max_levels) {
4222         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4223       }
4224       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4225     }
4226   }
4227 
4228   /* create the coarse KSP object only once with defaults */
4229   if (coarse_mat_is) {
4230     MatReuse coarse_mat_reuse;
4231     PetscViewer dbg_viewer = NULL;
4232     if (pcbddc->dbg_flag) {
4233       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4234       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4235     }
4236     if (!pcbddc->coarse_ksp) {
4237       char prefix[256],str_level[16];
4238       size_t len;
4239       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4240       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4241       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4242       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4243       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4244       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4245       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4246       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4247       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4248       /* prefix */
4249       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4250       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4251       if (!pcbddc->current_level) {
4252         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4253         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4254       } else {
4255         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4256         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4257         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4258         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4259         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4260         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4261       }
4262       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4263       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4264       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4265       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4266       ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4267       /* allow user customization */
4268       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4269     }
4270     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4271     if (nisdofs) {
4272       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4273       for (i=0;i<nisdofs;i++) {
4274         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4275       }
4276     }
4277     if (nisneu) {
4278       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4279       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4280     }
4281 
4282     /* get some info after set from options */
4283     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4284     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4285     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4286     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4287     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4288       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4289       isbddc = PETSC_FALSE;
4290     }
4291     if (isredundant) {
4292       KSP inner_ksp;
4293       PC inner_pc;
4294       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4295       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4296       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4297     }
4298 
4299     /* assemble coarse matrix */
4300     if (coarse_reuse) {
4301       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4302       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4303       coarse_mat_reuse = MAT_REUSE_MATRIX;
4304     } else {
4305       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4306     }
4307     if (isbddc || isnn) {
4308       if (pcbddc->coarsening_ratio > 1) {
4309         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4310           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4311           if (pcbddc->dbg_flag) {
4312             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4313             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4314             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4315             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4316           }
4317         }
4318         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4319       } else {
4320         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4321         coarse_mat = coarse_mat_is;
4322       }
4323     } else {
4324       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4325     }
4326     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4327 
4328     /* propagate symmetry info to coarse matrix */
4329     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
4330     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4331 
4332     /* set operators */
4333     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4334     if (pcbddc->dbg_flag) {
4335       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4336     }
4337   } else { /* processes non partecipating to coarse solver (if any) */
4338     coarse_mat = 0;
4339   }
4340   ierr = PetscFree(isarray);CHKERRQ(ierr);
4341 #if 0
4342   {
4343     PetscViewer viewer;
4344     char filename[256];
4345     sprintf(filename,"coarse_mat.m");
4346     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4347     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4348     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4349     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4350   }
4351 #endif
4352 
4353   /* Compute coarse null space (special handling by BDDC only) */
4354   if (pcbddc->NullSpace) {
4355     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4356   }
4357 
4358   if (pcbddc->coarse_ksp) {
4359     Vec crhs,csol;
4360     PetscBool ispreonly;
4361     if (CoarseNullSpace) {
4362       if (isbddc) {
4363         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4364       } else {
4365         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4366       }
4367     }
4368     /* setup coarse ksp */
4369     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4370     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4371     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4372     /* hack */
4373     if (!csol) {
4374       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4375     }
4376     if (!crhs) {
4377       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4378     }
4379     /* Check coarse problem if in debug mode or if solving with an iterative method */
4380     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4381     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4382       KSP       check_ksp;
4383       KSPType   check_ksp_type;
4384       PC        check_pc;
4385       Vec       check_vec,coarse_vec;
4386       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4387       PetscInt  its;
4388       PetscBool compute_eigs;
4389       PetscReal *eigs_r,*eigs_c;
4390       PetscInt  neigs;
4391       const char *prefix;
4392 
4393       /* Create ksp object suitable for estimation of extreme eigenvalues */
4394       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4395       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4396       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4397       if (ispreonly) {
4398         check_ksp_type = KSPPREONLY;
4399         compute_eigs = PETSC_FALSE;
4400       } else {
4401         check_ksp_type = KSPGMRES;
4402         compute_eigs = PETSC_TRUE;
4403       }
4404       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4405       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4406       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4407       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4408       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4409       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4410       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4411       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4412       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4413       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4414       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4415       /* create random vec */
4416       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4417       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4418       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4419       if (CoarseNullSpace) {
4420         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4421       }
4422       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4423       /* solve coarse problem */
4424       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4425       if (CoarseNullSpace) {
4426         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4427       }
4428       /* set eigenvalue estimation if preonly has not been requested */
4429       if (compute_eigs) {
4430         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4431         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4432         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4433         lambda_max = eigs_r[neigs-1];
4434         lambda_min = eigs_r[0];
4435         if (pcbddc->use_coarse_estimates) {
4436           if (lambda_max>lambda_min) {
4437             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4438             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4439           }
4440         }
4441       }
4442 
4443       /* check coarse problem residual error */
4444       if (pcbddc->dbg_flag) {
4445         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4446         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4447         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4448         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4449         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4450         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4451         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4452         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4453         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4454         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4455         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4456         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4457         if (compute_eigs) {
4458           PetscReal lambda_max_s,lambda_min_s;
4459           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4460           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4461           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4462           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4463           for (i=0;i<neigs;i++) {
4464             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4465           }
4466         }
4467         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4468         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4469       }
4470       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4471       if (compute_eigs) {
4472         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4473         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4474       }
4475     }
4476   }
4477   /* print additional info */
4478   if (pcbddc->dbg_flag) {
4479     /* waits until all processes reaches this point */
4480     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4481     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4482     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4483   }
4484 
4485   /* free memory */
4486   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4487   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4488   PetscFunctionReturn(0);
4489 }
4490 
4491 #undef __FUNCT__
4492 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4493 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4494 {
4495   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4496   PC_IS*         pcis = (PC_IS*)pc->data;
4497   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4498   PetscInt       i,coarse_size=0;
4499   PetscInt       *local_primal_indices=NULL;
4500   PetscErrorCode ierr;
4501 
4502   PetscFunctionBegin;
4503   /* Compute global number of coarse dofs */
4504   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
4505     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
4506   }
4507   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
4508 
4509   /* check numbering */
4510   if (pcbddc->dbg_flag) {
4511     PetscScalar coarsesum,*array;
4512     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4513 
4514     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4515     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4516     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4517     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4518     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4519     for (i=0;i<pcbddc->local_primal_size;i++) {
4520       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4521     }
4522     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4523     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4524     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4525     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4526     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4527     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4528     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4529     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4530     for (i=0;i<pcis->n;i++) {
4531       if (array[i] == 1.0) {
4532         set_error = PETSC_TRUE;
4533         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4534       }
4535     }
4536     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4537     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4538     for (i=0;i<pcis->n;i++) {
4539       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4540     }
4541     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4542     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4543     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4544     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4545     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4546     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4547     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4548       PetscInt *gidxs;
4549 
4550       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
4551       ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
4552       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4553       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4554       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4555       for (i=0;i<pcbddc->local_primal_size;i++) {
4556         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr);
4557       }
4558       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4559       ierr = PetscFree(gidxs);CHKERRQ(ierr);
4560     }
4561     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4562     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4563   }
4564   /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */
4565   /* get back data */
4566   *coarse_size_n = coarse_size;
4567   *local_primal_indices_n = local_primal_indices;
4568   PetscFunctionReturn(0);
4569 }
4570 
4571 #undef __FUNCT__
4572 #define __FUNCT__ "PCBDDCGlobalToLocal"
4573 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4574 {
4575   IS             localis_t;
4576   PetscInt       i,lsize,*idxs,n;
4577   PetscScalar    *vals;
4578   PetscErrorCode ierr;
4579 
4580   PetscFunctionBegin;
4581   /* get indices in local ordering exploiting local to global map */
4582   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4583   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4584   for (i=0;i<lsize;i++) vals[i] = 1.0;
4585   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4586   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4587   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4588   if (idxs) { /* multilevel guard */
4589     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4590   }
4591   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4592   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4593   ierr = PetscFree(vals);CHKERRQ(ierr);
4594   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4595   /* now compute set in local ordering */
4596   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4597   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4598   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4599   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4600   for (i=0,lsize=0;i<n;i++) {
4601     if (PetscRealPart(vals[i]) > 0.5) {
4602       lsize++;
4603     }
4604   }
4605   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4606   for (i=0,lsize=0;i<n;i++) {
4607     if (PetscRealPart(vals[i]) > 0.5) {
4608       idxs[lsize++] = i;
4609     }
4610   }
4611   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4612   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4613   *localis = localis_t;
4614   PetscFunctionReturn(0);
4615 }
4616 
4617 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4618 #undef __FUNCT__
4619 #define __FUNCT__ "PCBDDCMatMult_Private"
4620 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4621 {
4622   PCBDDCChange_ctx change_ctx;
4623   PetscErrorCode   ierr;
4624 
4625   PetscFunctionBegin;
4626   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4627   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4628   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4629   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4630   PetscFunctionReturn(0);
4631 }
4632 
4633 #undef __FUNCT__
4634 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4635 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4636 {
4637   PCBDDCChange_ctx change_ctx;
4638   PetscErrorCode   ierr;
4639 
4640   PetscFunctionBegin;
4641   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4642   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4643   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4644   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4645   PetscFunctionReturn(0);
4646 }
4647 
4648 #undef __FUNCT__
4649 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4650 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4651 {
4652   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4653   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4654   PetscInt            *used_xadj,*used_adjncy;
4655   PetscBool           free_used_adj;
4656   PetscErrorCode      ierr;
4657 
4658   PetscFunctionBegin;
4659   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4660   free_used_adj = PETSC_FALSE;
4661   if (pcbddc->sub_schurs_layers == -1) {
4662     used_xadj = NULL;
4663     used_adjncy = NULL;
4664   } else {
4665     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4666       used_xadj = pcbddc->mat_graph->xadj;
4667       used_adjncy = pcbddc->mat_graph->adjncy;
4668     } else if (pcbddc->computed_rowadj) {
4669       used_xadj = pcbddc->mat_graph->xadj;
4670       used_adjncy = pcbddc->mat_graph->adjncy;
4671     } else {
4672       PetscBool      flg_row=PETSC_FALSE;
4673       const PetscInt *xadj,*adjncy;
4674       PetscInt       nvtxs;
4675 
4676       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4677       if (flg_row) {
4678         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4679         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4680         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4681         free_used_adj = PETSC_TRUE;
4682       } else {
4683         pcbddc->sub_schurs_layers = -1;
4684         used_xadj = NULL;
4685         used_adjncy = NULL;
4686       }
4687       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4688     }
4689   }
4690   ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,pcbddc->adaptive_invert_Stildas,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4691 
4692   /* free adjacency */
4693   if (free_used_adj) {
4694     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4695   }
4696   PetscFunctionReturn(0);
4697 }
4698 
4699 #undef __FUNCT__
4700 #define __FUNCT__ "PCBDDCInitSubSchurs"
4701 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4702 {
4703   PC_IS               *pcis=(PC_IS*)pc->data;
4704   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4705   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4706   PCBDDCGraph         graph;
4707   Mat                 S_j;
4708   PetscErrorCode      ierr;
4709 
4710   PetscFunctionBegin;
4711   /* attach interface graph for determining subsets */
4712   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4713     IS verticesIS;
4714 
4715     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4716     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4717     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr);
4718     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr);
4719     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4720     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4721 /*
4722     if (pcbddc->dbg_flag) {
4723       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4724     }
4725 */
4726   } else {
4727     graph = pcbddc->mat_graph;
4728   }
4729 
4730   /* Create Schur complement matrix */
4731   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4732   ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4733 
4734   /* sub_schurs init */
4735   ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr);
4736   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4737   /* free graph struct */
4738   if (pcbddc->sub_schurs_rebuild) {
4739     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4740   }
4741   PetscFunctionReturn(0);
4742 }
4743