xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision d5574798c8464fdb92aede3bf97f4457bb71bbdc)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscScalar     *Smult,*Seigv;
20   PetscReal       *eigs,thresh;
21   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscErrorCode  ierr;
26 
27   PetscFunctionBegin;
28   if (!sub_schurs->use_mumps) {
29     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
30   }
31 
32   if (pcbddc->dbg_flag) {
33     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
34     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
35     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
36     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
37   }
38 
39   if (pcbddc->dbg_flag) {
40     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
41   }
42 
43   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
44     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
45   }
46 
47   /* max size of subsets */
48   mss = 0;
49   for (i=0;i<sub_schurs->n_subs;i++) {
50     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
51       PetscInt subset_size;
52       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
53       mss = PetscMax(mss,subset_size);
54     }
55   }
56 
57   /* min/max and threshold */
58   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
59   nmin = pcbddc->adaptive_nmin > -1 ? pcbddc->adaptive_nmin : 1;
60   nmax = PetscMax(nmin,nmax);
61 
62   /* allocate lapack workspace */
63   cum = cum2 = 0;
64   maxneigs = 0;
65   for (i=0;i<sub_schurs->n_subs;i++) {
66     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
67       PetscInt n,subset_size;
68       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
69       n = PetscMin(subset_size,nmax);
70       cum += subset_size*n;
71       cum2 += n;
72       maxneigs = PetscMax(maxneigs,n);
73     }
74   }
75 
76   if (mss) {
77     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
78       PetscBLASInt B_itype = 1;
79       PetscBLASInt B_N = mss;
80       PetscReal    zero = 0.0;
81       PetscReal    eps = 0.0; /* dlamch? */
82 
83       B_lwork = -1;
84       S = NULL;
85       St = NULL;
86       eigs = NULL;
87       eigv = NULL;
88       B_iwork = NULL;
89       B_ifail = NULL;
90       thresh = 1.0;
91       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
92 #if defined(PETSC_USE_COMPLEX)
93       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
94 #else
95       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
96 #endif
97       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
98       ierr = PetscFPTrapPop();CHKERRQ(ierr);
99     } else {
100         /* TODO */
101     }
102   } else {
103     lwork = 0;
104   }
105 
106   nv = 0;
107   if (sub_schurs->is_Ej_com && pcbddc->use_vertices) { /* complement of subsets, each entry is a vertex */
108     ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr);
109   }
110   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
111   ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs,
112                       B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
113 #if defined(PETSC_USE_COMPLEX)
114   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
115 #endif
116   ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr);
117   ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
118                       nv+cum2+1,&pcbddc->adaptive_constraints_ptrs,
119                       nv+cum,&pcbddc->adaptive_constraints_idxs,
120                       nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
121   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
122 
123   maxneigs = 0;
124   cum = cum2 = cumarray = 0;
125   if (sub_schurs->is_Ej_com && pcbddc->use_vertices) {
126     const PetscInt *idxs;
127 
128     ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
129     for (cum=0;cum<nv;cum++) {
130       pcbddc->adaptive_constraints_n[cum] = 1;
131       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
132       pcbddc->adaptive_constraints_ptrs[cum] = cum;
133       pcbddc->adaptive_constraints_data[cum] = 1.0;
134     }
135     cum2 = cum;
136     ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
137   }
138 
139   if (mss) { /* multilevel */
140     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
141     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
142   }
143 
144   for (i=0;i<sub_schurs->n_subs;i++) {
145     PetscInt j,subset_size;
146 
147     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
148     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
149       const PetscInt *idxs;
150       PetscScalar    one = 1.0,scalar_zero = 0.0;
151       PetscReal      zero=0.0;
152       PetscBLASInt   B_N;
153 
154       /* S should be copied since we need it for deluxe scaling */
155       if (sub_schurs->is_hermitian) {
156         PetscInt j;
157         for (j=0;j<subset_size;j++) {
158           ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
159         }
160         for (j=0;j<subset_size;j++) {
161           ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
162         }
163       } else {
164         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
165         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
166       }
167       /* is always this the right matrix? */
168       ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
169 
170       /* we could reuse space already allocated when building sum_S_Ej_tilda_all */
171       /* St = Starray+cumarray; */
172       ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
173       if (pcbddc->adaptive_threshold > 1.0) { /* this is an heuristic for edges */
174         thresh = (1.*pcbddc->mat_graph->count[idxs[0]])/(pcbddc->adaptive_threshold);
175       } else {
176         thresh = 1.0;
177       }
178 
179       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
180         PetscBLASInt B_itype = 1;
181         PetscBLASInt B_IL = 1, B_IU;
182         PetscReal    eps = -1.0; /* dlamch? */
183         PetscInt     nmin_s;
184 
185         ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
186         /* ask for eigenvalues lower than thresh */
187         if (pcbddc->dbg_flag) {
188           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
189         }
190         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
191 #if defined(PETSC_USE_COMPLEX)
192         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
193 #else
194         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
195 #endif
196         ierr = PetscFPTrapPop();CHKERRQ(ierr);
197         if (B_ierr) {
198           if (B_ierr < 0 ) {
199             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
200           } else if (B_ierr <= B_N) {
201             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
202           } else {
203             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
204           }
205         }
206 
207         if (B_neigs > nmax) {
208           if (pcbddc->dbg_flag) {
209             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
210           }
211           B_neigs = nmax;
212         }
213 
214         nmin_s = PetscMin(nmin,B_N);
215         if (B_neigs < nmin_s) {
216           PetscBLASInt B_neigs2;
217 
218           B_IL = B_neigs + 1;
219           ierr = PetscBLASIntCast(nmin_s,&B_IU);CHKERRQ(ierr);
220           if (pcbddc->dbg_flag) {
221             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
222           }
223           if (sub_schurs->is_hermitian) {
224             PetscInt j;
225             for (j=0;j<subset_size;j++) {
226               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
227             }
228             for (j=0;j<subset_size;j++) {
229               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
230             }
231           } else {
232             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
233             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
234           }
235           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
236 #if defined(PETSC_USE_COMPLEX)
237           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
238 #else
239           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
240 #endif
241           ierr = PetscFPTrapPop();CHKERRQ(ierr);
242           B_neigs += B_neigs2;
243         }
244         if (B_ierr) {
245           if (B_ierr < 0 ) {
246             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
247           } else if (B_ierr <= B_N) {
248             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
249           } else {
250             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
251           }
252         }
253         if (pcbddc->dbg_flag) {
254           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);
255           for (j=0;j<B_neigs;j++) {
256             if (eigs[j] == 0.0) {
257               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");
258             } else {
259               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",1.0/eigs[j]);
260             }
261           }
262         }
263       } else {
264           /* TODO */
265       }
266 
267       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
268       if (B_neigs) {
269         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&scalar_zero,Seigv,&B_N));
270       }
271       maxneigs = PetscMax(B_neigs,maxneigs);
272       pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
273       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
274 
275       if (pcbddc->dbg_flag > 1) {
276         PetscInt ii;
277         for (ii=0;ii<B_neigs;ii++) {
278           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
279           for (j=0;j<B_N;j++) {
280             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e %1.4e\n",eigv[ii*B_N+j],Seigv[ii*B_N+j]);
281           }
282         }
283       }
284       for (j=0;j<B_neigs;j++) {
285 #if 0
286         {
287           PetscBLASInt Blas_N,Blas_one = 1.0;
288           PetscScalar norm;
289           ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
290           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
291           if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
292             norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
293           } else {
294             norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
295           }
296           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
297         }
298 #endif
299         ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
300         pcbddc->adaptive_constraints_ptrs[cum++] = cum2;
301         cum2 += subset_size;
302       }
303       ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
304     }
305     /* shift for next computation */
306     cumarray += subset_size*subset_size;
307   }
308   if (pcbddc->dbg_flag) {
309     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
310   }
311   pcbddc->adaptive_constraints_ptrs[cum] = cum2;
312   ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr);
313 
314   if (mss) {
315     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
316     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
317   }
318   ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
319 #if defined(PETSC_USE_COMPLEX)
320   ierr = PetscFree(rwork);CHKERRQ(ierr);
321 #endif
322   if (pcbddc->dbg_flag) {
323     PetscInt maxneigs_r;
324     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
325     ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
326   }
327   PetscFunctionReturn(0);
328 }
329 
330 #undef __FUNCT__
331 #define __FUNCT__ "PCBDDCSetUpSolvers"
332 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
333 {
334   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
335   PetscScalar    *coarse_submat_vals;
336   PetscErrorCode ierr;
337 
338   PetscFunctionBegin;
339   /* Setup local scatters R_to_B and (optionally) R_to_D */
340   /* PCBDDCSetUpLocalWorkVectors should be called first! */
341   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
342 
343   /* Setup local neumann solver ksp_R */
344   /* PCBDDCSetUpLocalScatters should be called first! */
345   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
346 
347   /* Change global null space passed in by the user if change of basis has been requested */
348   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
349     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
350   }
351 
352   /*
353      Setup local correction and local part of coarse basis.
354      Gives back the dense local part of the coarse matrix in column major ordering
355   */
356   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
357 
358   /* Compute total number of coarse nodes and setup coarse solver */
359   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
360 
361   /* free */
362   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
363   PetscFunctionReturn(0);
364 }
365 
366 #undef __FUNCT__
367 #define __FUNCT__ "PCBDDCResetCustomization"
368 PetscErrorCode PCBDDCResetCustomization(PC pc)
369 {
370   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
371   PetscErrorCode ierr;
372 
373   PetscFunctionBegin;
374   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
375   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
376   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
377   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
378   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
379   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
380   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
381   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
382   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
383   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
384   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
385   PetscFunctionReturn(0);
386 }
387 
388 #undef __FUNCT__
389 #define __FUNCT__ "PCBDDCResetTopography"
390 PetscErrorCode PCBDDCResetTopography(PC pc)
391 {
392   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
393   PetscErrorCode ierr;
394 
395   PetscFunctionBegin;
396   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
397   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
398   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
399   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
400   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
401   PetscFunctionReturn(0);
402 }
403 
404 #undef __FUNCT__
405 #define __FUNCT__ "PCBDDCResetSolvers"
406 PetscErrorCode PCBDDCResetSolvers(PC pc)
407 {
408   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
409   PetscScalar    *array;
410   PetscErrorCode ierr;
411 
412   PetscFunctionBegin;
413   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
414   if (pcbddc->coarse_phi_B) {
415     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
416     ierr = PetscFree(array);CHKERRQ(ierr);
417   }
418   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
419   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
420   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
421   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
422   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
423   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
424   if (pcbddc->local_auxmat2) {
425     ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
426     ierr = PetscFree(array);CHKERRQ(ierr);
427   }
428   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
429   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
430   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
431   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
432   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
433   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
434   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
435   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
436   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
437   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
438   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
439   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
440   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
441   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
442   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
443   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
444   PetscFunctionReturn(0);
445 }
446 
447 #undef __FUNCT__
448 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
449 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
450 {
451   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
452   PC_IS          *pcis = (PC_IS*)pc->data;
453   VecType        impVecType;
454   PetscInt       n_constraints,n_R,old_size;
455   PetscErrorCode ierr;
456 
457   PetscFunctionBegin;
458   if (!pcbddc->ConstraintMatrix) {
459     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
460   }
461   /* get sizes */
462   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
463   n_R = pcis->n-pcbddc->n_actual_vertices;
464   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
465   /* local work vectors (try to avoid unneeded work)*/
466   /* R nodes */
467   old_size = -1;
468   if (pcbddc->vec1_R) {
469     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
470   }
471   if (n_R != old_size) {
472     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
473     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
474     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
475     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
476     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
477     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
478   }
479   /* local primal dofs */
480   old_size = -1;
481   if (pcbddc->vec1_P) {
482     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
483   }
484   if (pcbddc->local_primal_size != old_size) {
485     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
486     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
487     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
488     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
489   }
490   /* local explicit constraints */
491   old_size = -1;
492   if (pcbddc->vec1_C) {
493     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
494   }
495   if (n_constraints && n_constraints != old_size) {
496     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
497     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
498     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
499     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
500   }
501   PetscFunctionReturn(0);
502 }
503 
504 #undef __FUNCT__
505 #define __FUNCT__ "PCBDDCSetUpCorrection"
506 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
507 {
508   PetscErrorCode         ierr;
509   /* pointers to pcis and pcbddc */
510   PC_IS*                 pcis = (PC_IS*)pc->data;
511   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
512   /* submatrices of local problem */
513   Mat                    A_RV,A_VR,A_VV;
514   /* submatrices of local coarse problem */
515   Mat                    S_VV,S_CV,S_VC,S_CC;
516   /* working matrices */
517   Mat                    C_CR;
518   /* additional working stuff */
519   PC                     pc_R;
520   Mat                    F;
521   PetscBool              isLU,isCHOL,isILU;
522 
523   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
524   PetscScalar            *work;
525   PetscInt               *idx_V_B;
526   PetscInt               n,n_vertices,n_constraints;
527   PetscInt               i,n_R,n_D,n_B;
528   PetscBool              unsymmetric_check;
529   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
530   MatType                impMatType;
531   /* some shortcuts to scalars */
532   PetscScalar            one=1.0,m_one=-1.0;
533 
534   PetscFunctionBegin;
535   /* get number of vertices (corners plus constraints with change of basis)
536      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
537   n_vertices = pcbddc->n_actual_vertices;
538   n_constraints = pcbddc->local_primal_size-n_vertices;
539   /* Set Non-overlapping dimensions */
540   n_B = pcis->n_B; n_D = pcis->n - n_B;
541   n_R = pcis->n-n_vertices;
542 
543   /* Set types for local objects needed by BDDC precondtioner */
544   impMatType = MATSEQDENSE;
545 
546   /* vertices in boundary numbering */
547   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
548   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
549   if (i != n_vertices) {
550     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
551   }
552 
553   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
554   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
555   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
556   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
557   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
558   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
559   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
560   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
561   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
562   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
563 
564   unsymmetric_check = PETSC_FALSE;
565   /* allocate workspace */
566   n = 0;
567   if (n_constraints) {
568     n += n_R*n_constraints;
569   }
570   if (n_vertices) {
571     n = PetscMax(2*n_R*n_vertices,n);
572   }
573   if (!pcbddc->issym) {
574     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
575     unsymmetric_check = PETSC_TRUE;
576   }
577   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
578 
579   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
580   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
581   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
582   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
583   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
584   if (isLU || isILU || isCHOL) {
585     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
586   } else {
587     F = NULL;
588   }
589 
590   /* Precompute stuffs needed for preprocessing and application of BDDC*/
591   if (n_constraints) {
592     Mat M1,M2,M3;
593     IS  is_aux;
594     /* see if we can save some allocations */
595     if (pcbddc->local_auxmat2) {
596       PetscInt on_R,on_constraints;
597       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
598       if (on_R != n_R || on_constraints != n_constraints) {
599         PetscScalar *marray;
600 
601         ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr);
602         ierr = PetscFree(marray);CHKERRQ(ierr);
603         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
604         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
605       }
606     }
607     /* auxiliary matrices */
608     if (!pcbddc->local_auxmat2) {
609       PetscScalar *marray;
610 
611       ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr);
612       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr);
613       marray += n_R*n_constraints;
614       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr);
615     }
616 
617     /* Extract constraints on R nodes: C_{CR}  */
618     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
619     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
620     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
621 
622     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
623     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
624     for (i=0;i<n_constraints;i++) {
625       const PetscScalar *row_cmat_values;
626       const PetscInt    *row_cmat_indices;
627       PetscInt          size_of_constraint,j;
628 
629       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
630       for (j=0;j<size_of_constraint;j++) {
631         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
632       }
633       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
634     }
635     if (F) {
636       Mat B;
637 
638       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
639       ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr);
640       ierr = MatDestroy(&B);CHKERRQ(ierr);
641     } else {
642       PetscScalar *xarray;
643       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
644       for (i=0;i<n_constraints;i++) {
645         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
646         ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr);
647         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
648         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
649         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
650       }
651       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
652     }
653 
654     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
655     ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
656     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
657     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
658     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
659     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
660     ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr);
661     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
662     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
663     ierr = MatDestroy(&M2);CHKERRQ(ierr);
664     ierr = MatDestroy(&M3);CHKERRQ(ierr);
665     /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */
666     ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
667     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
668     ierr = MatDestroy(&M1);CHKERRQ(ierr);
669   }
670   /* Get submatrices from subdomain matrix */
671   if (n_vertices) {
672     Mat       newmat;
673     IS        is_aux;
674     PetscInt  ibs,mbs;
675     PetscBool issbaij;
676 
677     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
678     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
679     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
680     if (ibs != mbs) { /* need to convert to SEQAIJ */
681       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
682       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
683       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
684       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
685       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
686     } else {
687       /* this is safe */
688       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
689       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
690       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
691         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
692         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
693         ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
694         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
695         ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
696       } else {
697         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
698         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
699       }
700     }
701     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
702   }
703 
704   /* Matrix of coarse basis functions (local) */
705   if (pcbddc->coarse_phi_B) {
706     PetscInt on_B,on_primal,on_D=n_D;
707     if (pcbddc->coarse_phi_D) {
708       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
709     }
710     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
711     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
712       PetscScalar *marray;
713 
714       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
715       ierr = PetscFree(marray);CHKERRQ(ierr);
716       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
717       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
718       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
719       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
720     }
721   }
722 
723   if (!pcbddc->coarse_phi_B) {
724     PetscScalar *marray;
725 
726     n = n_B*pcbddc->local_primal_size;
727     if (pcbddc->switch_static || pcbddc->dbg_flag) {
728       n += n_D*pcbddc->local_primal_size;
729     }
730     if (!pcbddc->issym) {
731       n *= 2;
732     }
733     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
734     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
735     n = n_B*pcbddc->local_primal_size;
736     if (pcbddc->switch_static || pcbddc->dbg_flag) {
737       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
738       n += n_D*pcbddc->local_primal_size;
739     }
740     if (!pcbddc->issym) {
741       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
742       if (pcbddc->switch_static || pcbddc->dbg_flag) {
743         n = n_B*pcbddc->local_primal_size;
744         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
745       }
746     } else {
747       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
748       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
749       if (pcbddc->switch_static || pcbddc->dbg_flag) {
750         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
751         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
752       }
753     }
754   }
755   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
756   /* vertices */
757   if (n_vertices) {
758 
759     if (n_R) {
760       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
761       PetscBLASInt B_N,B_one = 1;
762       PetscScalar  *x,*y;
763 
764       ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
765       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
766       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
767       if (F) {
768         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
769       } else {
770         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
771         for (i=0;i<n_vertices;i++) {
772           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
773           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
774           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
775           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
776           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
777         }
778         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
779       }
780       ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr);
781       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
782       if (n_constraints) {
783         Mat B;
784         ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
785         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
786         ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
787         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
788         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
789         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
790         ierr = MatDestroy(&B);CHKERRQ(ierr);
791       }
792       ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
793       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
794       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
795       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
796       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
797       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
798       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
799       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
800       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
801       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
802       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
803       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
804     } else {
805       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
806       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
807     }
808     /* coarse basis functions */
809     for (i=0;i<n_vertices;i++) {
810       PetscScalar *y;
811 
812       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
813       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
814       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
815       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
816       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
817       y[n_B*i+idx_V_B[i]] = 1.0;
818       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
819       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
820 
821       if (pcbddc->switch_static || pcbddc->dbg_flag) {
822         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
823         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
824         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
825         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
826         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
827         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
828       }
829       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
830     }
831     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
832     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
833   }
834 
835   if (n_constraints) {
836     Mat B;
837 
838     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
839     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
840     ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
841     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
842     if (n_vertices) {
843       ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr);
844     }
845     ierr = MatDestroy(&B);CHKERRQ(ierr);
846     /* coarse basis functions */
847     for (i=0;i<n_constraints;i++) {
848       PetscScalar *y;
849 
850       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
851       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
852       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
853       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
854       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
855       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
856       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
857 
858       if (pcbddc->switch_static || pcbddc->dbg_flag) {
859         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
860         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
861         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
862         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
863         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
864         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
865       }
866       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
867     }
868   }
869 
870   /* compute other basis functions for non-symmetric problems */
871   if (!pcbddc->issym) {
872     Mat B,X;
873 
874     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr);
875 
876     if (n_constraints) {
877       Mat S_CCT,B_C;
878 
879       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
880       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
881       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
882       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
883       if (n_vertices) {
884         Mat B_V,S_VCT;
885 
886         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
887         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
888         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
889         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
890         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
891       }
892       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
893     }
894     if (n_vertices && n_R) {
895       Mat          A_VRT;
896       PetscBLASInt B_N,B_one = 1;
897 
898       if (!n_constraints) { /* if there are no constraints, reset work */
899         ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
900       }
901       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr);
902       ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
903       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
904       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one));
905       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
906     }
907 
908     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr);
909     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
910       for (i=0;i<pcbddc->local_primal_size;i++) {
911         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
912         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
913         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
914         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
915         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
916       }
917     } else {
918       for (i=0;i<pcbddc->local_primal_size;i++) {
919         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
920         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
921         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
922         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
923         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
924       }
925     }
926     ierr = MatDestroy(&B);CHKERRQ(ierr);
927     /* coarse basis functions */
928     for (i=0;i<pcbddc->local_primal_size;i++) {
929       PetscScalar *y;
930 
931       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
932       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
933       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
934       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
935       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
936       if (i<n_vertices) {
937         y[n_B*i+idx_V_B[i]] = 1.0;
938       }
939       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
940       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
941 
942       if (pcbddc->switch_static || pcbddc->dbg_flag) {
943         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
944         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
945         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
946         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
947         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
948         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
949       }
950       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
951     }
952     ierr = MatDestroy(&X);CHKERRQ(ierr);
953   }
954 
955   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
956   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
957   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
958   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
959   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
960   /* Checking coarse_sub_mat and coarse basis functios */
961   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
962   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
963   if (pcbddc->dbg_flag) {
964     Mat         coarse_sub_mat;
965     Mat         AUXMAT,TM1,TM2,TM3,TM4;
966     Mat         coarse_phi_D,coarse_phi_B;
967     Mat         coarse_psi_D,coarse_psi_B;
968     Mat         A_II,A_BB,A_IB,A_BI;
969     Mat         C_B,CPHI;
970     IS          is_dummy;
971     Vec         mones;
972     MatType     checkmattype=MATSEQAIJ;
973     PetscReal   real_value;
974 
975     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
976     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
977     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
978     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
979     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
980     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
981     if (unsymmetric_check) {
982       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
983       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
984     }
985     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
986 
987     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
988     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr);
989     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
990     if (unsymmetric_check) {
991       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
992       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
993       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
994       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
995       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
996       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
997       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
998       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
999       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1000       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1001       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1002       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1003     } else {
1004       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1005       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1006       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1007       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1008       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1009       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1010       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1011       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1012     }
1013     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1014     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1015     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1016     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1017     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1018     ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1019     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1020     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d      matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1021 
1022     /* check constraints */
1023     ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr);
1024     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1025     ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1026     ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr);
1027     ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1028     ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1029     ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1030     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1031     ierr = MatDestroy(&C_B);CHKERRQ(ierr);
1032     ierr = MatDestroy(&CPHI);CHKERRQ(ierr);
1033     ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1034     ierr = VecDestroy(&mones);CHKERRQ(ierr);
1035 
1036     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1037     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1038     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1039     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1040     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1041     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1042     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1043     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1044     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1045     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1046     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1047     if (unsymmetric_check) {
1048       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1049       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1050     }
1051     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1052   }
1053 
1054   /* free memory */
1055   ierr = PetscFree(work);CHKERRQ(ierr);
1056   if (n_vertices) {
1057     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1058   }
1059   if (n_constraints) {
1060     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1061   }
1062   /* get back data */
1063   *coarse_submat_vals_n = coarse_submat_vals;
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 #undef __FUNCT__
1068 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1069 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1070 {
1071   Mat            *work_mat;
1072   IS             isrow_s,iscol_s;
1073   PetscBool      rsorted,csorted;
1074   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1075   PetscErrorCode ierr;
1076 
1077   PetscFunctionBegin;
1078   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1079   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1080   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1081   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1082 
1083   if (!rsorted) {
1084     const PetscInt *idxs;
1085     PetscInt *idxs_sorted,i;
1086 
1087     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1088     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1089     for (i=0;i<rsize;i++) {
1090       idxs_perm_r[i] = i;
1091     }
1092     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1093     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1094     for (i=0;i<rsize;i++) {
1095       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1096     }
1097     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1098     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1099   } else {
1100     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1101     isrow_s = isrow;
1102   }
1103 
1104   if (!csorted) {
1105     if (isrow == iscol) {
1106       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1107       iscol_s = isrow_s;
1108     } else {
1109       const PetscInt *idxs;
1110       PetscInt *idxs_sorted,i;
1111 
1112       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1113       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1114       for (i=0;i<csize;i++) {
1115         idxs_perm_c[i] = i;
1116       }
1117       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1118       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1119       for (i=0;i<csize;i++) {
1120         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1121       }
1122       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1123       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1124     }
1125   } else {
1126     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1127     iscol_s = iscol;
1128   }
1129 
1130   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1131 
1132   if (!rsorted || !csorted) {
1133     Mat      new_mat;
1134     IS       is_perm_r,is_perm_c;
1135 
1136     if (!rsorted) {
1137       PetscInt *idxs_r,i;
1138       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1139       for (i=0;i<rsize;i++) {
1140         idxs_r[idxs_perm_r[i]] = i;
1141       }
1142       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1143       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1144     } else {
1145       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1146     }
1147     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1148 
1149     if (!csorted) {
1150       if (isrow_s == iscol_s) {
1151         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1152         is_perm_c = is_perm_r;
1153       } else {
1154         PetscInt *idxs_c,i;
1155         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1156         for (i=0;i<csize;i++) {
1157           idxs_c[idxs_perm_c[i]] = i;
1158         }
1159         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1160         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1161       }
1162     } else {
1163       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1164     }
1165     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1166 
1167     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1168     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1169     work_mat[0] = new_mat;
1170     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1171     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1172   }
1173 
1174   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1175   *B = work_mat[0];
1176   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1177   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1178   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 #undef __FUNCT__
1183 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1184 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1185 {
1186   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1187   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1188   Mat            new_mat;
1189   IS             is_local,is_global;
1190   PetscInt       local_size;
1191   PetscBool      isseqaij;
1192   PetscErrorCode ierr;
1193 
1194   PetscFunctionBegin;
1195   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1196   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1197   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1198   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1199   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1200   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1201   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1202 
1203   /* check */
1204   if (pcbddc->dbg_flag) {
1205     Vec       x,x_change;
1206     PetscReal error;
1207 
1208     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1209     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1210     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1211     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1212     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1213     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1214     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1215     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1216     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1217     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1218     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1219     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1220     ierr = VecDestroy(&x);CHKERRQ(ierr);
1221     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1222   }
1223 
1224   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1225   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1226   if (isseqaij) {
1227     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1228   } else {
1229     Mat work_mat;
1230     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1231     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1232     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1233   }
1234   ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
1235 #if !defined(PETSC_USE_COMPLEX)
1236   ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,pcbddc->issym);CHKERRQ(ierr);
1237 #endif
1238   /*
1239   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1240   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1241   */
1242   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1243   PetscFunctionReturn(0);
1244 }
1245 
1246 #undef __FUNCT__
1247 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1248 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1249 {
1250   PC_IS*         pcis = (PC_IS*)(pc->data);
1251   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1252   IS             is_aux1,is_aux2;
1253   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1254   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
1255   PetscInt       vbs,bs;
1256   PetscBT        bitmask;
1257   PetscErrorCode ierr;
1258 
1259   PetscFunctionBegin;
1260   /*
1261     No need to setup local scatters if
1262       - primal space is unchanged
1263         AND
1264       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1265         AND
1266       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1267   */
1268   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1269     PetscFunctionReturn(0);
1270   }
1271   /* destroy old objects */
1272   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1273   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1274   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1275   /* Set Non-overlapping dimensions */
1276   n_B = pcis->n_B; n_D = pcis->n - n_B;
1277   n_vertices = pcbddc->n_actual_vertices;
1278   /* create auxiliary bitmask */
1279   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1280   for (i=0;i<n_vertices;i++) {
1281     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
1282   }
1283 
1284   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1285   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1286   for (i=0, n_R=0; i<pcis->n; i++) {
1287     if (!PetscBTLookup(bitmask,i)) {
1288       idx_R_local[n_R] = i;
1289       n_R++;
1290     }
1291   }
1292 
1293   /* Block code */
1294   vbs = 1;
1295   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1296   if (bs>1 && !(n_vertices%bs)) {
1297     PetscBool is_blocked = PETSC_TRUE;
1298     PetscInt  *vary;
1299     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1300     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1301     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1302     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
1303     for (i=0; i<n_vertices/bs; i++) {
1304       if (vary[i]!=0 && vary[i]!=bs) {
1305         is_blocked = PETSC_FALSE;
1306         break;
1307       }
1308     }
1309     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1310       vbs = bs;
1311       for (i=0;i<n_R/vbs;i++) {
1312         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1313       }
1314     }
1315     ierr = PetscFree(vary);CHKERRQ(ierr);
1316   }
1317   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1318   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1319 
1320   /* print some info if requested */
1321   if (pcbddc->dbg_flag) {
1322     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1323     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1324     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1325     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1326     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1327     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1328     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
1329     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1330   }
1331 
1332   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1333   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1334   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1335   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1336   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1337   for (i=0; i<n_D; i++) {
1338     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1339   }
1340   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1341   for (i=0, j=0; i<n_R; i++) {
1342     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1343       aux_array1[j++] = i;
1344     }
1345   }
1346   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1347   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1348   for (i=0, j=0; i<n_B; i++) {
1349     if (!PetscBTLookup(bitmask,is_indices[i])) {
1350       aux_array2[j++] = i;
1351     }
1352   }
1353   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1354   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1355   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1356   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1357   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1358 
1359   if (pcbddc->switch_static || pcbddc->dbg_flag) {
1360     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1361     for (i=0, j=0; i<n_R; i++) {
1362       if (PetscBTLookup(bitmask,idx_R_local[i])) {
1363         aux_array1[j++] = i;
1364       }
1365     }
1366     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1367     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1368     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1369   }
1370   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1371   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1372   PetscFunctionReturn(0);
1373 }
1374 
1375 
1376 #undef __FUNCT__
1377 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1378 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1379 {
1380   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1381   PC_IS          *pcis = (PC_IS*)pc->data;
1382   PC             pc_temp;
1383   Mat            A_RR;
1384   MatReuse       reuse;
1385   PetscScalar    m_one = -1.0;
1386   PetscReal      value;
1387   PetscInt       n_D,n_R,ibs,mbs;
1388   PetscBool      use_exact,use_exact_reduced,issbaij;
1389   PetscErrorCode ierr;
1390   /* prefixes stuff */
1391   char           dir_prefix[256],neu_prefix[256],str_level[16];
1392   size_t         len;
1393 
1394   PetscFunctionBegin;
1395 
1396   /* compute prefixes */
1397   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1398   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1399   if (!pcbddc->current_level) {
1400     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1401     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1402     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1403     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1404   } else {
1405     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1406     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1407     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1408     len -= 15; /* remove "pc_bddc_coarse_" */
1409     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1410     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1411     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1412     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1413     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1414     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1415     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1416     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1417   }
1418 
1419   /* DIRICHLET PROBLEM */
1420   if (dirichlet) {
1421     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1422     if (pcbddc->issym) {
1423       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1424     }
1425     /* Matrix for Dirichlet problem is pcis->A_II */
1426     n_D = pcis->n - pcis->n_B;
1427     if (!pcbddc->ksp_D) { /* create object if not yet build */
1428       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1429       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1430       /* default */
1431       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1432       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1433       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1434       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1435       if (issbaij) {
1436         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1437       } else {
1438         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1439       }
1440       /* Allow user's customization */
1441       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1442       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1443     }
1444     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1445     if (sub_schurs->interior_solver) {
1446       ierr = KSPSetPC(pcbddc->ksp_D,sub_schurs->interior_solver);CHKERRQ(ierr);
1447     }
1448     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1449     if (!n_D) {
1450       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1451       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1452     }
1453     /* Set Up KSP for Dirichlet problem of BDDC */
1454     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1455     /* set ksp_D into pcis data */
1456     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1457     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1458     pcis->ksp_D = pcbddc->ksp_D;
1459   }
1460 
1461   /* NEUMANN PROBLEM */
1462   A_RR = 0;
1463   if (neumann) {
1464     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1465     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1466     if (pcbddc->ksp_R) { /* already created ksp */
1467       PetscInt nn_R;
1468       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1469       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1470       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1471       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1472         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1473         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1474         reuse = MAT_INITIAL_MATRIX;
1475       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1476         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1477           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1478           reuse = MAT_INITIAL_MATRIX;
1479         } else { /* safe to reuse the matrix */
1480           reuse = MAT_REUSE_MATRIX;
1481         }
1482       }
1483       /* last check */
1484       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1485         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1486         reuse = MAT_INITIAL_MATRIX;
1487       }
1488     } else { /* first time, so we need to create the matrix */
1489       reuse = MAT_INITIAL_MATRIX;
1490     }
1491     /* extract A_RR */
1492     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1493     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1494     if (ibs != mbs) {
1495       Mat newmat;
1496       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1497       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1498       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1499     } else {
1500       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1501     }
1502     if (pcbddc->issym) {
1503       ierr = MatSetOption(A_RR,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1504     }
1505     if (!pcbddc->ksp_R) { /* create object if not present */
1506       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1507       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1508       /* default */
1509       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1510       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1511       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1512       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1513       if (issbaij) {
1514         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1515       } else {
1516         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1517       }
1518       /* Allow user's customization */
1519       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1520       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1521     }
1522     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1523     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1524     if (!n_R) {
1525       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1526       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1527     }
1528     /* Set Up KSP for Neumann problem of BDDC */
1529     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1530   }
1531 
1532   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1533   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1534     if (pcbddc->dbg_flag) {
1535       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1536       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1537       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1538     }
1539     if (dirichlet) { /* Dirichlet */
1540       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1541       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1542       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1543       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1544       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1545       /* need to be adapted? */
1546       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1547       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1548       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1549       /* print info */
1550       if (pcbddc->dbg_flag) {
1551         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1552         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1553       }
1554       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1555         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1556       }
1557     }
1558     if (neumann) { /* Neumann */
1559       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1560       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1561       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1562       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1563       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1564       /* need to be adapted? */
1565       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1566       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1567       /* print info */
1568       if (pcbddc->dbg_flag) {
1569         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1570         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1571       }
1572       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1573         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1574       }
1575     }
1576   }
1577   /* free Neumann problem's matrix */
1578   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1579   PetscFunctionReturn(0);
1580 }
1581 
1582 #undef __FUNCT__
1583 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1584 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1585 {
1586   PetscErrorCode ierr;
1587   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1588 
1589   PetscFunctionBegin;
1590   if (applytranspose) {
1591     if (pcbddc->local_auxmat1) {
1592       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1593       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1594     }
1595     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1596   } else {
1597     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1598     if (pcbddc->local_auxmat1) {
1599       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1600       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1601     }
1602   }
1603   PetscFunctionReturn(0);
1604 }
1605 
1606 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1607 #undef __FUNCT__
1608 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1609 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1610 {
1611   PetscErrorCode ierr;
1612   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1613   PC_IS*            pcis = (PC_IS*)  (pc->data);
1614   const PetscScalar zero = 0.0;
1615 
1616   PetscFunctionBegin;
1617   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1618   if (applytranspose) {
1619     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1620     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1621   } else {
1622     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1623     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1624   }
1625   /* start communications from local primal nodes to rhs of coarse solver */
1626   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1627   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1628   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1629 
1630   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1631   /* TODO remove null space when doing multilevel */
1632   if (pcbddc->coarse_ksp) {
1633     Vec rhs,sol;
1634 
1635     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
1636     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
1637     if (applytranspose) {
1638       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1639     } else {
1640       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1641     }
1642   }
1643 
1644   /* Local solution on R nodes */
1645   if (pcis->n) {
1646     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1647     ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1648     ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1649     if (pcbddc->switch_static) {
1650       ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1651       ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1652     }
1653     ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1654     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1655     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1656     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1657     if (pcbddc->switch_static) {
1658       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1659       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1660     }
1661   }
1662 
1663   /* communications from coarse sol to local primal nodes */
1664   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1665   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1666 
1667   /* Sum contributions from two levels */
1668   if (applytranspose) {
1669     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1670     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1671   } else {
1672     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1673     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1674   }
1675   PetscFunctionReturn(0);
1676 }
1677 
1678 #undef __FUNCT__
1679 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1680 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1681 {
1682   PetscErrorCode ierr;
1683   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1684   PetscScalar    *array;
1685   Vec            from,to;
1686 
1687   PetscFunctionBegin;
1688   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1689     from = pcbddc->coarse_vec;
1690     to = pcbddc->vec1_P;
1691     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1692       Vec tvec;
1693 
1694       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1695       ierr = VecResetArray(tvec);CHKERRQ(ierr);
1696       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1697       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
1698       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
1699       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
1700     }
1701   } else { /* from local to global -> put data in coarse right hand side */
1702     from = pcbddc->vec1_P;
1703     to = pcbddc->coarse_vec;
1704   }
1705   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1706   PetscFunctionReturn(0);
1707 }
1708 
1709 #undef __FUNCT__
1710 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1711 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1712 {
1713   PetscErrorCode ierr;
1714   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1715   PetscScalar    *array;
1716   Vec            from,to;
1717 
1718   PetscFunctionBegin;
1719   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1720     from = pcbddc->coarse_vec;
1721     to = pcbddc->vec1_P;
1722   } else { /* from local to global -> put data in coarse right hand side */
1723     from = pcbddc->vec1_P;
1724     to = pcbddc->coarse_vec;
1725   }
1726   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1727   if (smode == SCATTER_FORWARD) {
1728     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1729       Vec tvec;
1730 
1731       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1732       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
1733       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
1734       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
1735     }
1736   } else {
1737     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
1738      ierr = VecResetArray(from);CHKERRQ(ierr);
1739     }
1740   }
1741   PetscFunctionReturn(0);
1742 }
1743 
1744 /* uncomment for testing purposes */
1745 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1746 #undef __FUNCT__
1747 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1748 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1749 {
1750   PetscErrorCode    ierr;
1751   PC_IS*            pcis = (PC_IS*)(pc->data);
1752   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1753   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1754   /* one and zero */
1755   PetscScalar       one=1.0,zero=0.0;
1756   /* space to store constraints and their local indices */
1757   PetscScalar       *temp_quadrature_constraint;
1758   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1759   /* iterators */
1760   PetscInt          i,j,k,total_counts,temp_start_ptr;
1761   /* BLAS integers */
1762   PetscBLASInt      lwork,lierr;
1763   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1764   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1765   /* reuse */
1766   PetscInt          olocal_primal_size;
1767   PetscInt          *oprimal_indices_local_idxs;
1768   /* change of basis */
1769   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1770   PetscBool         boolforchange,qr_needed;
1771   PetscBT           touched,change_basis,qr_needed_idx;
1772   /* auxiliary stuff */
1773   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1774   PetscInt          ncc,*gidxs=NULL,*permutation=NULL,*temp_indices_to_constraint_work=NULL;
1775   PetscScalar       *temp_quadrature_constraint_work=NULL;
1776   /* some quantities */
1777   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1778   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1779 
1780   PetscFunctionBegin;
1781   /* Destroy Mat objects computed previously */
1782   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1783   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1784 
1785   /* print some info */
1786   if (pcbddc->dbg_flag) {
1787     IS       vertices;
1788     PetscInt nv,nedges,nfaces;
1789     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
1790     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
1791     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
1792     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1793     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1794     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
1795     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
1796     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
1797     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1798   }
1799 
1800   if (!pcbddc->adaptive_selection) {
1801     IS           ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1802     MatNullSpace nearnullsp;
1803     const Vec    *nearnullvecs;
1804     Vec          *localnearnullsp;
1805     PetscScalar  *array;
1806     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
1807     PetscBool    nnsp_has_cnst;
1808     /* LAPACK working arrays for SVD or POD */
1809     PetscBool    skip_lapack;
1810     PetscScalar  *work;
1811     PetscReal    *singular_vals;
1812 #if defined(PETSC_USE_COMPLEX)
1813     PetscReal    *rwork;
1814 #endif
1815 #if defined(PETSC_MISSING_LAPACK_GESVD)
1816     PetscScalar  *temp_basis,*correlation_mat;
1817 #else
1818     PetscBLASInt dummy_int=1;
1819     PetscScalar  dummy_scalar=1.;
1820 #endif
1821 
1822     /* Get index sets for faces, edges and vertices from graph */
1823     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1824     /* free unneeded index sets */
1825     if (!pcbddc->use_vertices) {
1826       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1827     }
1828     if (!pcbddc->use_edges) {
1829       for (i=0;i<n_ISForEdges;i++) {
1830         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1831       }
1832       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1833       n_ISForEdges = 0;
1834     }
1835     if (!pcbddc->use_faces) {
1836       for (i=0;i<n_ISForFaces;i++) {
1837         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1838       }
1839       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1840       n_ISForFaces = 0;
1841     }
1842     /* HACKS (the following two blocks of code) */
1843     if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1844       pcbddc->use_change_of_basis = PETSC_TRUE;
1845       if (!ISForEdges) {
1846         pcbddc->use_change_on_faces = PETSC_TRUE;
1847       }
1848     }
1849     if (pcbddc->NullSpace) {
1850       /* use_change_of_basis should be consistent among processors */
1851       PetscBool tbool[2],gbool[2];
1852       tbool[0] = pcbddc->use_change_of_basis;
1853       tbool[1] = pcbddc->use_change_on_faces;
1854       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1855       pcbddc->use_change_of_basis = gbool[0];
1856       pcbddc->use_change_on_faces = gbool[1];
1857     }
1858 
1859     /* check if near null space is attached to global mat */
1860     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1861     if (nearnullsp) {
1862       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1863       /* remove any stored info */
1864       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1865       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1866       /* store information for BDDC solver reuse */
1867       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1868       pcbddc->onearnullspace = nearnullsp;
1869       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1870       for (i=0;i<nnsp_size;i++) {
1871         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1872       }
1873     } else { /* if near null space is not provided BDDC uses constants by default */
1874       nnsp_size = 0;
1875       nnsp_has_cnst = PETSC_TRUE;
1876     }
1877     /* get max number of constraints on a single cc */
1878     max_constraints = nnsp_size;
1879     if (nnsp_has_cnst) max_constraints++;
1880 
1881     /*
1882          Evaluate maximum storage size needed by the procedure
1883          - temp_indices will contain start index of each constraint stored as follows
1884          - temp_indices_to_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1885          - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1886          - temp_quadrature_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself
1887                                                                                                                                                            */
1888     total_counts = n_ISForFaces+n_ISForEdges;
1889     total_counts *= max_constraints;
1890     n_vertices = 0;
1891     if (ISForVertices) {
1892       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1893     }
1894     total_counts += n_vertices;
1895     ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr);
1896     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1897     total_counts = 0;
1898     max_size_of_constraint = 0;
1899     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1900       if (i<n_ISForEdges) {
1901         used_IS = &ISForEdges[i];
1902       } else {
1903         used_IS = &ISForFaces[i-n_ISForEdges];
1904       }
1905       ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1906       total_counts += j;
1907       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1908     }
1909     total_counts *= max_constraints;
1910     total_counts += n_vertices;
1911     ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1912     /* get local part of global near null space vectors */
1913     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1914     for (k=0;k<nnsp_size;k++) {
1915       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1916       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1917       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1918     }
1919 
1920     /* whether or not to skip lapack calls */
1921     skip_lapack = PETSC_TRUE;
1922     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
1923 
1924     /* allocate some auxiliary stuff */
1925     if (!skip_lapack || pcbddc->use_qr_single) {
1926       ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr);
1927     } else {
1928       gidxs = NULL;
1929       permutation = NULL;
1930       temp_indices_to_constraint_work = NULL;
1931       temp_quadrature_constraint_work = NULL;
1932     }
1933 
1934     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1935     if (!skip_lapack) {
1936       PetscScalar temp_work;
1937 
1938 #if defined(PETSC_MISSING_LAPACK_GESVD)
1939       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1940       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1941       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1942       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1943 #if defined(PETSC_USE_COMPLEX)
1944       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1945 #endif
1946       /* now we evaluate the optimal workspace using query with lwork=-1 */
1947       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1948       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1949       lwork = -1;
1950       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1951 #if !defined(PETSC_USE_COMPLEX)
1952       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1953 #else
1954       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1955 #endif
1956       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1957       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1958 #else /* on missing GESVD */
1959       /* SVD */
1960       PetscInt max_n,min_n;
1961       max_n = max_size_of_constraint;
1962       min_n = max_constraints;
1963       if (max_size_of_constraint < max_constraints) {
1964         min_n = max_size_of_constraint;
1965         max_n = max_constraints;
1966       }
1967       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1968 #if defined(PETSC_USE_COMPLEX)
1969       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1970 #endif
1971       /* now we evaluate the optimal workspace using query with lwork=-1 */
1972       lwork = -1;
1973       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1974       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1975       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1976       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1977 #if !defined(PETSC_USE_COMPLEX)
1978       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
1979 #else
1980       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
1981 #endif
1982       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1983       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1984 #endif /* on missing GESVD */
1985       /* Allocate optimal workspace */
1986       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1987       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
1988     }
1989     /* Now we can loop on constraining sets */
1990     total_counts = 0;
1991     temp_indices[0] = 0;
1992     /* vertices */
1993     if (ISForVertices) {
1994       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1995       if (nnsp_has_cnst) { /* consider all vertices */
1996         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1997         for (i=0;i<n_vertices;i++) {
1998           temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1999           temp_indices[total_counts+1]=temp_indices[total_counts]+1;
2000           total_counts++;
2001         }
2002       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
2003         PetscBool used_vertex;
2004         for (i=0;i<n_vertices;i++) {
2005           used_vertex = PETSC_FALSE;
2006           k = 0;
2007           while (!used_vertex && k<nnsp_size) {
2008             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2009             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
2010               temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
2011               temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
2012               temp_indices[total_counts+1]=temp_indices[total_counts]+1;
2013               total_counts++;
2014               used_vertex = PETSC_TRUE;
2015             }
2016             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2017             k++;
2018           }
2019         }
2020       }
2021       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2022       n_vertices = total_counts;
2023     }
2024 
2025     /* edges and faces */
2026     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2027       if (ncc<n_ISForEdges) {
2028         used_IS = &ISForEdges[ncc];
2029         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2030       } else {
2031         used_IS = &ISForFaces[ncc-n_ISForEdges];
2032         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2033       }
2034       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2035       temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
2036       ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
2037       ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2038       /* change of basis should not be performed on local periodic nodes */
2039       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2040       if (nnsp_has_cnst) {
2041         PetscScalar quad_value;
2042         temp_constraints++;
2043         if (!pcbddc->use_nnsp_true) {
2044           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2045         } else {
2046           quad_value = 1.0;
2047         }
2048         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2049         for (j=0;j<size_of_constraint;j++) {
2050           temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
2051         }
2052         /* sort by global ordering if using lapack subroutines (not needed!) */
2053         if (!skip_lapack || pcbddc->use_qr_single) {
2054           ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
2055           for (j=0;j<size_of_constraint;j++) {
2056             permutation[j]=j;
2057           }
2058           ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2059           for (j=0;j<size_of_constraint;j++) {
2060             if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen");
2061           }
2062           for (j=0;j<size_of_constraint;j++) {
2063             temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2064             temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2065           }
2066           ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2067           ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2068         }
2069         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2070         total_counts++;
2071       }
2072       for (k=0;k<nnsp_size;k++) {
2073         PetscReal real_value;
2074         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2075         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2076         for (j=0;j<size_of_constraint;j++) {
2077           temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
2078         }
2079         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2080         /* check if array is null on the connected component */
2081         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2082         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
2083         if (real_value > 0.0) { /* keep indices and values */
2084           /* sort by global ordering if using lapack subroutines */
2085           if (!skip_lapack || pcbddc->use_qr_single) {
2086             ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
2087             for (j=0;j<size_of_constraint;j++) {
2088               permutation[j]=j;
2089             }
2090             ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2091             for (j=0;j<size_of_constraint;j++) {
2092               temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2093               temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2094             }
2095             ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2096             ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2097           }
2098           temp_constraints++;
2099           temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2100           total_counts++;
2101         }
2102       }
2103       ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2104       valid_constraints = temp_constraints;
2105       if (!pcbddc->use_nnsp_true && temp_constraints) {
2106         if (temp_constraints == 1) { /* just normalize the constraint */
2107           PetscScalar norm;
2108           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2109           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2110           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2111           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2112         } else { /* perform SVD */
2113           PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2114 
2115 #if defined(PETSC_MISSING_LAPACK_GESVD)
2116           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2117              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2118              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2119                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2120                 from that computed using LAPACKgesvd
2121              -> This is due to a different computation of eigenvectors in LAPACKheev
2122              -> The quality of the POD-computed basis will be the same */
2123           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2124           /* Store upper triangular part of correlation matrix */
2125           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2126           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2127           for (j=0;j<temp_constraints;j++) {
2128             for (k=0;k<j+1;k++) {
2129               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one));
2130             }
2131           }
2132           /* compute eigenvalues and eigenvectors of correlation matrix */
2133           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2134           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2135 #if !defined(PETSC_USE_COMPLEX)
2136           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2137 #else
2138           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2139 #endif
2140           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2141           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2142           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2143           j = 0;
2144           while (j < temp_constraints && singular_vals[j] < tol) j++;
2145           total_counts = total_counts-j;
2146           valid_constraints = temp_constraints-j;
2147           /* scale and copy POD basis into used quadrature memory */
2148           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2149           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2150           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2151           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2152           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2153           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2154           if (j<temp_constraints) {
2155             PetscInt ii;
2156             for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
2157             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2158             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2159             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2160             for (k=0;k<temp_constraints-j;k++) {
2161               for (ii=0;ii<size_of_constraint;ii++) {
2162                 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2163               }
2164             }
2165           }
2166 #else  /* on missing GESVD */
2167           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2168           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2169           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2170           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2171 #if !defined(PETSC_USE_COMPLEX)
2172           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2173 #else
2174           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2175 #endif
2176           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2177           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2178           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2179           k = temp_constraints;
2180           if (k > size_of_constraint) k = size_of_constraint;
2181           j = 0;
2182           while (j < k && singular_vals[k-j-1] < tol) j++;
2183           valid_constraints = k-j;
2184           total_counts = total_counts-temp_constraints+valid_constraints;
2185 #endif /* on missing GESVD */
2186         }
2187       }
2188       /* setting change_of_basis flag is safe now */
2189       if (boolforchange) {
2190         for (j=0;j<valid_constraints;j++) {
2191           PetscBTSet(change_basis,total_counts-j-1);
2192         }
2193       }
2194     }
2195     /* free workspace */
2196     if (!skip_lapack || pcbddc->use_qr_single) {
2197       ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr);
2198     }
2199     if (!skip_lapack) {
2200       ierr = PetscFree(work);CHKERRQ(ierr);
2201 #if defined(PETSC_USE_COMPLEX)
2202       ierr = PetscFree(rwork);CHKERRQ(ierr);
2203 #endif
2204       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2205 #if defined(PETSC_MISSING_LAPACK_GESVD)
2206       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2207       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2208 #endif
2209     }
2210     for (k=0;k<nnsp_size;k++) {
2211       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2212     }
2213     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2214     /* free index sets of faces, edges and vertices */
2215     for (i=0;i<n_ISForFaces;i++) {
2216       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2217     }
2218     if (n_ISForFaces) {
2219       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2220     }
2221     for (i=0;i<n_ISForEdges;i++) {
2222       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2223     }
2224     if (n_ISForEdges) {
2225       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2226     }
2227     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2228   } else {
2229     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2230     PetscInt        cum = 0;
2231 
2232     total_counts = 0;
2233     n_vertices = 0;
2234     if (sub_schurs->is_Ej_com && pcbddc->use_vertices) {
2235       ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr);
2236     }
2237     max_constraints = 0;
2238     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2239       total_counts += pcbddc->adaptive_constraints_n[i];
2240       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2241     }
2242     temp_indices = pcbddc->adaptive_constraints_ptrs;
2243     temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs;
2244     temp_quadrature_constraint = pcbddc->adaptive_constraints_data;
2245 
2246 #if 0
2247     printf("Found %d totals\n",total_counts);
2248     for (i=0;i<total_counts;i++) {
2249       printf("const %d, start %d",i,temp_indices[i]);
2250       printf(" end %d:\n",temp_indices[i+1]);
2251       for (j=temp_indices[i];j<temp_indices[i+1];j++) {
2252         printf("  idxs %d",temp_indices_to_constraint[j]);
2253         printf("  data %1.2e\n",temp_quadrature_constraint[j]);
2254       }
2255     }
2256     for (i=0;i<n_vertices;i++) {
2257       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
2258     }
2259     for (i=0;i<sub_schurs->n_subs;i++) {
2260       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2261     }
2262 #endif
2263 
2264     max_size_of_constraint = 0;
2265     for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]);
2266     ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr);
2267     /* Change of basis */
2268     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2269     if (pcbddc->use_change_of_basis) {
2270       cum = n_vertices;
2271       for (i=0;i<sub_schurs->n_subs;i++) {
2272         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2273           for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) {
2274             ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr);
2275           }
2276         }
2277         cum += pcbddc->adaptive_constraints_n[i+n_vertices];
2278       }
2279     }
2280   }
2281 
2282   /* map temp_indices_to_constraint in boundary numbering */
2283   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
2284   if (i != temp_indices[total_counts]) {
2285     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
2286   }
2287 
2288   /* set quantities in pcbddc data structure and store previous primal size */
2289   /* n_vertices defines the number of subdomain corners in the primal space */
2290   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
2291   olocal_primal_size = pcbddc->local_primal_size;
2292   pcbddc->local_primal_size = total_counts;
2293   pcbddc->n_vertices = n_vertices;
2294   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
2295 
2296   /* Create constraint matrix */
2297   /* The constraint matrix is used to compute the l2g map of primal dofs */
2298   /* so we need to set it up properly either with or without change of basis */
2299   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2300   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2301   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2302   /* array to compute a local numbering of constraints : vertices first then constraints */
2303   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
2304   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
2305   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
2306   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
2307   /* auxiliary stuff for basis change */
2308   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
2309   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
2310 
2311   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2312   total_primal_vertices=0;
2313   for (i=0;i<pcbddc->local_primal_size;i++) {
2314     size_of_constraint=temp_indices[i+1]-temp_indices[i];
2315     if (size_of_constraint == 1) {
2316       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
2317       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
2318       aux_primal_minloc[total_primal_vertices]=0;
2319       total_primal_vertices++;
2320     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
2321       PetscInt min_loc,min_index;
2322       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
2323       /* find first untouched local node */
2324       k = 0;
2325       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
2326       min_index = global_indices[k];
2327       min_loc = k;
2328       /* search the minimum among global nodes already untouched on the cc */
2329       for (k=1;k<size_of_constraint;k++) {
2330         /* there can be more than one constraint on a single connected component */
2331         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
2332           min_index = global_indices[k];
2333           min_loc = k;
2334         }
2335       }
2336       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
2337       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
2338       aux_primal_minloc[total_primal_vertices]=min_loc;
2339       total_primal_vertices++;
2340     }
2341   }
2342   /* determine if a QR strategy is needed for change of basis */
2343   qr_needed = PETSC_FALSE;
2344   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
2345   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2346     if (PetscBTLookup(change_basis,i)) {
2347       if (!pcbddc->use_qr_single && !pcbddc->faster_deluxe) {
2348         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2349         j = 0;
2350         for (k=0;k<size_of_constraint;k++) {
2351           if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
2352             j++;
2353           }
2354         }
2355         /* found more than one primal dof on the cc */
2356         if (j > 1) {
2357           PetscBTSet(qr_needed_idx,i);
2358           qr_needed = PETSC_TRUE;
2359         }
2360       } else {
2361         PetscBTSet(qr_needed_idx,i);
2362         qr_needed = PETSC_TRUE;
2363       }
2364     }
2365   }
2366   /* free workspace */
2367   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2368 
2369   /* permute indices in order to have a sorted set of vertices */
2370   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
2371 
2372   /* nonzero structure of constraint matrix */
2373   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2374   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
2375   j=total_primal_vertices;
2376   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2377     if (!PetscBTLookup(change_basis,i)) {
2378       nnz[j]=temp_indices[i+1]-temp_indices[i];
2379       j++;
2380     }
2381   }
2382   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2383   ierr = PetscFree(nnz);CHKERRQ(ierr);
2384   /* set values in constraint matrix */
2385   for (i=0;i<total_primal_vertices;i++) {
2386     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2387   }
2388   total_counts = total_primal_vertices;
2389   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2390     if (!PetscBTLookup(change_basis,i)) {
2391       size_of_constraint=temp_indices[i+1]-temp_indices[i];
2392       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
2393       total_counts++;
2394     }
2395   }
2396   /* assembling */
2397   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2398   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2399   /*
2400   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2401   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2402   */
2403   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2404   if (pcbddc->use_change_of_basis) {
2405     /* dual and primal dofs on a single cc */
2406     PetscInt     dual_dofs,primal_dofs;
2407     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
2408     PetscInt     primal_counter;
2409     /* working stuff for GEQRF */
2410     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2411     PetscBLASInt lqr_work;
2412     /* working stuff for UNGQR */
2413     PetscScalar  *gqr_work,lgqr_work_t;
2414     PetscBLASInt lgqr_work;
2415     /* working stuff for TRTRS */
2416     PetscScalar  *trs_rhs;
2417     PetscBLASInt Blas_NRHS;
2418     /* pointers for values insertion into change of basis matrix */
2419     PetscInt     *start_rows,*start_cols;
2420     PetscScalar  *start_vals;
2421     /* working stuff for values insertion */
2422     PetscBT      is_primal;
2423     /* matrix sizes */
2424     PetscInt     global_size,local_size;
2425     /* temporary change of basis */
2426     Mat          localChangeOfBasisMatrix;
2427     /* extra space for debugging */
2428     PetscScalar  *dbg_work;
2429 
2430     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2431     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2432     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2433     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2434     /* nonzeros for local mat */
2435     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2436     for (i=0;i<pcis->n;i++) nnz[i]=1;
2437     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2438       if (PetscBTLookup(change_basis,i)) {
2439         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2440         if (PetscBTLookup(qr_needed_idx,i)) {
2441           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2442         } else {
2443           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2;
2444           /* get local primal index on the cc */
2445           j = 0;
2446           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
2447           nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2448         }
2449       }
2450     }
2451     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2452     ierr = PetscFree(nnz);CHKERRQ(ierr);
2453     /* Set initial identity in the matrix */
2454     for (i=0;i<pcis->n;i++) {
2455       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2456     }
2457 
2458     if (pcbddc->dbg_flag) {
2459       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2460       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2461     }
2462 
2463 
2464     /* Now we loop on the constraints which need a change of basis */
2465     /*
2466        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2467        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2468 
2469        Basic blocks of change of basis matrix T computed by
2470 
2471           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2472 
2473             | 1        0   ...        0         s_1/S |
2474             | 0        1   ...        0         s_2/S |
2475             |              ...                        |
2476             | 0        ...            1     s_{n-1}/S |
2477             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2478 
2479             with S = \sum_{i=1}^n s_i^2
2480             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2481                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2482 
2483           - QR decomposition of constraints otherwise
2484     */
2485     if (qr_needed) {
2486       /* space to store Q */
2487       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2488       /* first we issue queries for optimal work */
2489       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2490       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2491       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2492       lqr_work = -1;
2493       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2494       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2495       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2496       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2497       lgqr_work = -1;
2498       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2499       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2500       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2501       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2502       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2503       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2504       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2505       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2506       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2507       /* array to store scaling factors for reflectors */
2508       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2509       /* array to store rhs and solution of triangular solver */
2510       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2511       /* allocating workspace for check */
2512       if (pcbddc->dbg_flag) {
2513         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
2514       }
2515     }
2516     /* array to store whether a node is primal or not */
2517     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2518     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2519     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2520     if (i != total_primal_vertices) {
2521       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2522     }
2523     for (i=0;i<total_primal_vertices;i++) {
2524       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2525     }
2526     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2527 
2528     /* loop on constraints and see whether or not they need a change of basis and compute it */
2529     /* -> using implicit ordering contained in temp_indices data */
2530     total_counts = pcbddc->n_vertices;
2531     primal_counter = total_counts;
2532     while (total_counts<pcbddc->local_primal_size) {
2533       primal_dofs = 1;
2534       if (PetscBTLookup(change_basis,total_counts)) {
2535         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2536         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) {
2537           primal_dofs++;
2538         }
2539         /* get constraint info */
2540         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2541         dual_dofs = size_of_constraint-primal_dofs;
2542 
2543         if (pcbddc->dbg_flag) {
2544           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2545         }
2546 
2547         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2548 
2549           /* copy quadrature constraints for change of basis check */
2550           if (pcbddc->dbg_flag) {
2551             ierr = PetscMemcpy(dbg_work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2552           }
2553           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2554           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2555 
2556           /* compute QR decomposition of constraints */
2557           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2558           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2559           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2560           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2561           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2562           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2563           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2564 
2565           /* explictly compute R^-T */
2566           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2567           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2568           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2569           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2570           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2571           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2572           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2573           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2574           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2575           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2576 
2577           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2578           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2579           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2580           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2581           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2582           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2583           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2584           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2585           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2586 
2587           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2588              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2589              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2590           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2591           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2592           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2593           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2594           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2595           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2596           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2597           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2598           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2599           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2600 
2601           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2602           start_rows = &temp_indices_to_constraint[temp_indices[total_counts]];
2603           /* insert cols for primal dofs */
2604           for (j=0;j<primal_dofs;j++) {
2605             start_vals = &qr_basis[j*size_of_constraint];
2606             start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2607             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2608           }
2609           /* insert cols for dual dofs */
2610           for (j=0,k=0;j<dual_dofs;k++) {
2611             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2612               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2613               start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k];
2614               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2615               j++;
2616             }
2617           }
2618 
2619           /* check change of basis */
2620           if (pcbddc->dbg_flag) {
2621             PetscInt   ii,jj;
2622             PetscBool valid_qr=PETSC_TRUE;
2623             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2624             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2625             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2626             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2627             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2628             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2629             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2630             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
2631             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2632             for (jj=0;jj<size_of_constraint;jj++) {
2633               for (ii=0;ii<primal_dofs;ii++) {
2634                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2635                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2636               }
2637             }
2638             if (!valid_qr) {
2639               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2640               for (jj=0;jj<size_of_constraint;jj++) {
2641                 for (ii=0;ii<primal_dofs;ii++) {
2642                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2643                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2644                   }
2645                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2646                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2647                   }
2648                 }
2649               }
2650             } else {
2651               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2652             }
2653           }
2654         } else { /* simple transformation block */
2655           PetscInt    row,col;
2656           PetscScalar val,norm;
2657 
2658           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2659           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one));
2660           for (j=0;j<size_of_constraint;j++) {
2661             PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2662             row = temp_indices_to_constraint[temp_indices[total_counts]+j];
2663             if (!PetscBTLookup(is_primal,row_B)) {
2664               col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2665               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2666               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2667             } else {
2668               for (k=0;k<size_of_constraint;k++) {
2669                 col = temp_indices_to_constraint[temp_indices[total_counts]+k];
2670                 if (row != col) {
2671                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2672                 } else {
2673                   val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm;
2674                 }
2675                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2676               }
2677             }
2678           }
2679           if (pcbddc->dbg_flag) {
2680             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2681           }
2682         }
2683         /* increment primal counter */
2684         primal_counter += primal_dofs;
2685       } else {
2686         if (pcbddc->dbg_flag) {
2687           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2688         }
2689       }
2690       /* increment constraint counter total_counts */
2691       total_counts += primal_dofs;
2692     }
2693 
2694     /* free workspace */
2695     if (qr_needed) {
2696       if (pcbddc->dbg_flag) {
2697         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
2698       }
2699       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2700       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2701       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2702       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2703       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2704     }
2705     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2706     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2707     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2708 
2709     /* assembling of global change of variable */
2710     {
2711       Mat      tmat;
2712       PetscInt bs;
2713 
2714       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2715       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2716       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2717       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2718       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2719       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2720       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
2721       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
2722       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2723       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2724       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2725       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2726       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2727       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2728       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2729       ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2730       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2731       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2732     }
2733     /* check */
2734     if (pcbddc->dbg_flag) {
2735       PetscReal error;
2736       Vec       x,x_change;
2737 
2738       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2739       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2740       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2741       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2742       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2743       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2744       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2745       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2746       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2747       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2748       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2749       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2750       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2751       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2752       ierr = VecDestroy(&x);CHKERRQ(ierr);
2753       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2754     }
2755 
2756     /* adapt sub_schurs computed (if any) */
2757     if (pcbddc->use_deluxe_scaling) {
2758       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2759       if (sub_schurs->S_Ej_all) {
2760         Mat S_new,tmat;
2761         IS is_all_N;
2762 
2763         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2764         ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,&tmat);CHKERRQ(ierr);
2765         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2766         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2767         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2768         ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2769         sub_schurs->S_Ej_all = S_new;
2770         ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2771         if (sub_schurs->sum_S_Ej_all) {
2772           ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2773           ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2774           ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2775           sub_schurs->sum_S_Ej_all = S_new;
2776           ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2777         }
2778         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2779       }
2780     }
2781     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2782   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2783     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2784     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2785   }
2786 
2787   /* set up change of basis context */
2788   if (pcbddc->ChangeOfBasisMatrix) {
2789     PCBDDCChange_ctx change_ctx;
2790 
2791     if (!pcbddc->new_global_mat) {
2792       PetscInt global_size,local_size;
2793 
2794       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2795       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2796       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2797       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2798       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2799       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2800       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2801       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2802       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2803     } else {
2804       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2805       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2806       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2807     }
2808     if (!pcbddc->user_ChangeOfBasisMatrix) {
2809       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2810       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2811     } else {
2812       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2813       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2814     }
2815     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2816     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2817     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2818     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2819   }
2820 
2821   /* get indices in local ordering for vertices and constraints */
2822   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2823     ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
2824     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2825   }
2826   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2827   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2828   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2829   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2830   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2831   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2832   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2833   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2834   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2835   /* set quantities in PCBDDC data struct */
2836   pcbddc->n_actual_vertices = i;
2837   /* check if a new primal space has been introduced */
2838   pcbddc->new_primal_space_local = PETSC_TRUE;
2839   if (olocal_primal_size == pcbddc->local_primal_size) {
2840     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2841     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2842     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2843   }
2844   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2845   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2846 
2847   /* flush dbg viewer */
2848   if (pcbddc->dbg_flag) {
2849     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2850   }
2851 
2852   /* free workspace */
2853   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2854   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2855   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2856   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2857   if (!pcbddc->adaptive_selection) {
2858     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2859     ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr);
2860   } else {
2861     ierr = PetscFree4(pcbddc->adaptive_constraints_n,
2862                       pcbddc->adaptive_constraints_ptrs,
2863                       pcbddc->adaptive_constraints_idxs,
2864                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2865     ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2866   }
2867   PetscFunctionReturn(0);
2868 }
2869 
2870 #undef __FUNCT__
2871 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2872 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2873 {
2874   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2875   PC_IS       *pcis = (PC_IS*)pc->data;
2876   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2877   PetscInt    ierr,i,vertex_size;
2878   PetscViewer viewer=pcbddc->dbg_viewer;
2879 
2880   PetscFunctionBegin;
2881   /* Reset previously computed graph */
2882   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2883   /* Init local Graph struct */
2884   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2885 
2886   /* Check validity of the csr graph passed in by the user */
2887   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2888     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2889   }
2890 
2891   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2892   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2893     PetscInt  *xadj,*adjncy;
2894     PetscInt  nvtxs;
2895     PetscBool flg_row=PETSC_FALSE;
2896 
2897     if (pcbddc->use_local_adj) {
2898 
2899       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2900       if (flg_row) {
2901         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2902         pcbddc->computed_rowadj = PETSC_TRUE;
2903       }
2904       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2905     } else if (pcbddc->current_level) { /* just compute subdomain's connected components for coarser levels */
2906       IS                     is_dummy;
2907       ISLocalToGlobalMapping l2gmap_dummy;
2908       PetscInt               j,sum;
2909       PetscInt               *cxadj,*cadjncy;
2910       const PetscInt         *idxs;
2911       PCBDDCGraph            graph;
2912       PetscBT                is_on_boundary;
2913 
2914       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
2915       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
2916       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
2917       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
2918       ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr);
2919       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
2920       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2921       if (flg_row) {
2922         graph->xadj = xadj;
2923         graph->adjncy = adjncy;
2924       }
2925       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
2926       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
2927       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2928 
2929       if (pcbddc->dbg_flag) {
2930         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr);
2931         for (i=0;i<graph->ncc;i++) {
2932           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
2933         }
2934         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2935       }
2936 
2937       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
2938       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2939       for (i=0;i<pcis->n_B;i++) {
2940         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
2941       }
2942       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2943 
2944       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
2945       sum = 0;
2946       for (i=0;i<graph->ncc;i++) {
2947         PetscInt sizecc = 0;
2948         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2949           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2950             sizecc++;
2951           }
2952         }
2953         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2954           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2955             cxadj[graph->queue[j]] = sizecc;
2956           }
2957         }
2958         sum += sizecc*sizecc;
2959       }
2960       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
2961       sum = 0;
2962       for (i=0;i<pcis->n;i++) {
2963         PetscInt temp = cxadj[i];
2964         cxadj[i] = sum;
2965         sum += temp;
2966       }
2967       cxadj[pcis->n] = sum;
2968       for (i=0;i<graph->ncc;i++) {
2969         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2970           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2971             PetscInt k,sizecc = 0;
2972             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
2973               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
2974                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
2975                 sizecc++;
2976               }
2977             }
2978           }
2979         }
2980       }
2981       if (pcis->n) {
2982         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
2983       } else {
2984         ierr = PetscFree(cxadj);CHKERRQ(ierr);
2985         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
2986       }
2987       graph->xadj = 0;
2988       graph->adjncy = 0;
2989       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
2990       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
2991     }
2992   }
2993 
2994   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2995   vertex_size = 1;
2996   if (pcbddc->user_provided_isfordofs) {
2997     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2998       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2999       for (i=0;i<pcbddc->n_ISForDofs;i++) {
3000         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3001         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
3002       }
3003       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
3004       pcbddc->n_ISForDofs = 0;
3005       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
3006     }
3007     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
3008     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
3009   } else {
3010     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
3011       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
3012       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3013       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3014         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3015       }
3016     }
3017   }
3018 
3019   /* Setup of Graph */
3020   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3021     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
3022   }
3023   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3024     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
3025   }
3026   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr);
3027 
3028   /* Graph's connected components analysis */
3029   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
3030 
3031   /* print some info to stdout */
3032   if (pcbddc->dbg_flag) {
3033     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr);
3034   }
3035 
3036   /* mark topography has done */
3037   pcbddc->recompute_topography = PETSC_FALSE;
3038   PetscFunctionReturn(0);
3039 }
3040 
3041 #undef __FUNCT__
3042 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
3043 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
3044 {
3045   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
3046   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
3047   PetscErrorCode ierr;
3048 
3049   PetscFunctionBegin;
3050   n = 0;
3051   vertices = 0;
3052   if (pcbddc->ConstraintMatrix) {
3053     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
3054     for (i=0;i<local_primal_size;i++) {
3055       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3056       if (size_of_constraint == 1) n++;
3057       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3058     }
3059     if (vertices_idx) {
3060       ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr);
3061       n = 0;
3062       for (i=0;i<local_primal_size;i++) {
3063         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3064         if (size_of_constraint == 1) {
3065           vertices[n++]=row_cmat_indices[0];
3066         }
3067         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3068       }
3069     }
3070   }
3071   *n_vertices = n;
3072   if (vertices_idx) *vertices_idx = vertices;
3073   PetscFunctionReturn(0);
3074 }
3075 
3076 #undef __FUNCT__
3077 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
3078 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
3079 {
3080   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
3081   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
3082   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
3083   PetscBT        touched;
3084   PetscErrorCode ierr;
3085 
3086     /* This function assumes that the number of local constraints per connected component
3087        is not greater than the number of nodes defined for the connected component
3088        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
3089   PetscFunctionBegin;
3090   n = 0;
3091   constraints_index = 0;
3092   if (pcbddc->ConstraintMatrix) {
3093     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
3094     max_size_of_constraint = 0;
3095     for (i=0;i<local_primal_size;i++) {
3096       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3097       if (size_of_constraint > 1) {
3098         n++;
3099       }
3100       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
3101       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3102     }
3103     if (constraints_idx) {
3104       ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr);
3105       ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr);
3106       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
3107       n = 0;
3108       for (i=0;i<local_primal_size;i++) {
3109         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3110         if (size_of_constraint > 1) {
3111           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
3112           /* find first untouched local node */
3113           j = 0;
3114           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
3115           min_index = row_cmat_global_indices[j];
3116           min_loc = j;
3117           /* search the minimum among nodes not yet touched on the connected component
3118              since there can be more than one constraint on a single cc */
3119           for (j=1;j<size_of_constraint;j++) {
3120             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
3121               min_index = row_cmat_global_indices[j];
3122               min_loc = j;
3123             }
3124           }
3125           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
3126           constraints_index[n++] = row_cmat_indices[min_loc];
3127         }
3128         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3129       }
3130       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
3131       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
3132     }
3133   }
3134   *n_constraints = n;
3135   if (constraints_idx) *constraints_idx = constraints_index;
3136   PetscFunctionReturn(0);
3137 }
3138 
3139 #undef __FUNCT__
3140 #define __FUNCT__ "PCBDDCSubsetNumbering"
3141 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
3142 {
3143   Vec            local_vec,global_vec;
3144   IS             seqis,paris;
3145   VecScatter     scatter_ctx;
3146   PetscScalar    *array;
3147   PetscInt       *temp_global_dofs;
3148   PetscScalar    globalsum;
3149   PetscInt       i,j,s;
3150   PetscInt       nlocals,first_index,old_index,max_local,max_global;
3151   PetscMPIInt    rank_prec_comm,size_prec_comm;
3152   PetscInt       *dof_sizes,*dof_displs;
3153   PetscBool      first_found;
3154   PetscErrorCode ierr;
3155 
3156   PetscFunctionBegin;
3157   /* mpi buffers */
3158   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
3159   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
3160   j = ( !rank_prec_comm ? size_prec_comm : 0);
3161   ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr);
3162   /* get maximum size of subset */
3163   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
3164   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
3165   max_local = 0;
3166   for (i=0;i<n_local_dofs;i++) {
3167     if (max_local < temp_global_dofs[i] ) {
3168       max_local = temp_global_dofs[i];
3169     }
3170   }
3171   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
3172   max_global++;
3173   max_local = 0;
3174   for (i=0;i<n_local_dofs;i++) {
3175     if (max_local < local_dofs[i] ) {
3176       max_local = local_dofs[i];
3177     }
3178   }
3179   max_local++;
3180   /* allocate workspace */
3181   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
3182   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
3183   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
3184   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
3185   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
3186   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
3187   /* create scatter */
3188   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
3189   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
3190   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
3191   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
3192   ierr = ISDestroy(&paris);CHKERRQ(ierr);
3193   /* init array */
3194   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
3195   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3196   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3197   if (local_dofs_mult) {
3198     for (i=0;i<n_local_dofs;i++) {
3199       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
3200     }
3201   } else {
3202     for (i=0;i<n_local_dofs;i++) {
3203       array[local_dofs[i]]=1.0;
3204     }
3205   }
3206   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3207   /* scatter into global vec and get total number of global dofs */
3208   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3209   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3210   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
3211   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
3212   /* Fill global_vec with cumulative function for global numbering */
3213   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
3214   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
3215   nlocals = 0;
3216   first_index = -1;
3217   first_found = PETSC_FALSE;
3218   for (i=0;i<s;i++) {
3219     if (!first_found && PetscRealPart(array[i]) > 0.1) {
3220       first_found = PETSC_TRUE;
3221       first_index = i;
3222     }
3223     nlocals += (PetscInt)PetscRealPart(array[i]);
3224   }
3225   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3226   if (!rank_prec_comm) {
3227     dof_displs[0]=0;
3228     for (i=1;i<size_prec_comm;i++) {
3229       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3230     }
3231   }
3232   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3233   if (first_found) {
3234     array[first_index] += (PetscScalar)nlocals;
3235     old_index = first_index;
3236     for (i=first_index+1;i<s;i++) {
3237       if (PetscRealPart(array[i]) > 0.1) {
3238         array[i] += array[old_index];
3239         old_index = i;
3240       }
3241     }
3242   }
3243   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
3244   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3245   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3246   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3247   /* get global ordering of local dofs */
3248   ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3249   if (local_dofs_mult) {
3250     for (i=0;i<n_local_dofs;i++) {
3251       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
3252     }
3253   } else {
3254     for (i=0;i<n_local_dofs;i++) {
3255       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
3256     }
3257   }
3258   ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3259   /* free workspace */
3260   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
3261   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
3262   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
3263   ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr);
3264   /* return pointer to global ordering of local dofs */
3265   *global_numbering_subset = temp_global_dofs;
3266   PetscFunctionReturn(0);
3267 }
3268 
3269 #undef __FUNCT__
3270 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3271 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3272 {
3273   PetscInt       i,j;
3274   PetscScalar    *alphas;
3275   PetscErrorCode ierr;
3276 
3277   PetscFunctionBegin;
3278   /* this implements stabilized Gram-Schmidt */
3279   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3280   for (i=0;i<n;i++) {
3281     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3282     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3283     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3284   }
3285   ierr = PetscFree(alphas);CHKERRQ(ierr);
3286   PetscFunctionReturn(0);
3287 }
3288 
3289 #undef __FUNCT__
3290 #define __FUNCT__ "MatISGetSubassemblingPattern"
3291 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
3292 {
3293   Mat             subdomain_adj;
3294   IS              new_ranks,ranks_send_to;
3295   MatPartitioning partitioner;
3296   Mat_IS          *matis;
3297   PetscInt        n_neighs,*neighs,*n_shared,**shared;
3298   PetscInt        prank;
3299   PetscMPIInt     size,rank,color;
3300   PetscInt        *xadj,*adjncy,*oldranks;
3301   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
3302   PetscInt        i,local_size,threshold=0;
3303   PetscErrorCode  ierr;
3304   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3305   PetscSubcomm    subcomm;
3306 
3307   PetscFunctionBegin;
3308   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3309   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3310   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3311 
3312   /* Get info on mapping */
3313   matis = (Mat_IS*)(mat->data);
3314   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3315   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3316 
3317   /* build local CSR graph of subdomains' connectivity */
3318   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3319   xadj[0] = 0;
3320   xadj[1] = PetscMax(n_neighs-1,0);
3321   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3322   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3323 
3324   if (threshold) {
3325     PetscInt xadj_count = 0;
3326     for (i=1;i<n_neighs;i++) {
3327       if (n_shared[i] > threshold) {
3328         adjncy[xadj_count] = neighs[i];
3329         adjncy_wgt[xadj_count] = n_shared[i];
3330         xadj_count++;
3331       }
3332     }
3333     xadj[1] = xadj_count;
3334   } else {
3335     if (xadj[1]) {
3336       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3337       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3338     }
3339   }
3340   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3341   if (use_square) {
3342     for (i=0;i<xadj[1];i++) {
3343       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3344     }
3345   }
3346   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3347 
3348   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3349 
3350   /*
3351     Restrict work on active processes only.
3352   */
3353   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3354   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3355   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3356   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3357   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3358   if (color) {
3359     ierr = PetscFree(xadj);CHKERRQ(ierr);
3360     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3361     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3362   } else {
3363     PetscInt coarsening_ratio;
3364     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
3365     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3366     prank = rank;
3367     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
3368     /*
3369     for (i=0;i<size;i++) {
3370       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3371     }
3372     */
3373     for (i=0;i<xadj[1];i++) {
3374       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3375     }
3376     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3377     ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3378     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3379 
3380     /* Partition */
3381     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
3382     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3383     if (use_vwgt) {
3384       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3385       v_wgt[0] = local_size;
3386       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3387     }
3388     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3389     coarsening_ratio = size/n_subdomains;
3390     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3391     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3392     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3393     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3394 
3395     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3396     if (contiguous) {
3397       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3398     } else {
3399       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3400     }
3401     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3402     /* clean up */
3403     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3404     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3405     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3406     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3407   }
3408   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3409 
3410   /* assemble parallel IS for sends */
3411   i = 1;
3412   if (color) i=0;
3413   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3414 
3415   /* get back IS */
3416   *is_sends = ranks_send_to;
3417   PetscFunctionReturn(0);
3418 }
3419 
3420 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3421 
3422 #undef __FUNCT__
3423 #define __FUNCT__ "MatISSubassemble"
3424 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3425 {
3426   Mat                    local_mat;
3427   Mat_IS                 *matis;
3428   IS                     is_sends_internal;
3429   PetscInt               rows,cols,new_local_rows;
3430   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3431   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3432   ISLocalToGlobalMapping l2gmap;
3433   PetscInt*              l2gmap_indices;
3434   const PetscInt*        is_indices;
3435   MatType                new_local_type;
3436   /* buffers */
3437   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3438   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3439   PetscInt               *recv_buffer_idxs_local;
3440   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3441   /* MPI */
3442   MPI_Comm               comm,comm_n;
3443   PetscSubcomm           subcomm;
3444   PetscMPIInt            n_sends,n_recvs,commsize;
3445   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3446   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3447   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3448   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3449   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3450   PetscErrorCode         ierr;
3451 
3452   PetscFunctionBegin;
3453   /* TODO: add missing checks */
3454   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3455   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3456   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3457   PetscValidLogicalCollectiveInt(mat,nis,7);
3458   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3459   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3460   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3461   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3462   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3463   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3464   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3465   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3466     PetscInt mrows,mcols,mnrows,mncols;
3467     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3468     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3469     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3470     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3471     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3472     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3473   }
3474   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3475   PetscValidLogicalCollectiveInt(mat,bs,0);
3476   /* prepare IS for sending if not provided */
3477   if (!is_sends) {
3478     PetscBool pcontig = PETSC_TRUE;
3479     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3480     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3481   } else {
3482     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3483     is_sends_internal = is_sends;
3484   }
3485 
3486   /* get pointer of MATIS data */
3487   matis = (Mat_IS*)mat->data;
3488 
3489   /* get comm */
3490   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3491 
3492   /* compute number of sends */
3493   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3494   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3495 
3496   /* compute number of receives */
3497   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3498   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3499   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3500   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3501   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3502   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3503   ierr = PetscFree(iflags);CHKERRQ(ierr);
3504 
3505   /* restrict comm if requested */
3506   subcomm = 0;
3507   destroy_mat = PETSC_FALSE;
3508   if (restrict_comm) {
3509     PetscMPIInt color,subcommsize;
3510 
3511     color = 0;
3512     if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */
3513     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3514     subcommsize = commsize - subcommsize;
3515     /* check if reuse has been requested */
3516     if (reuse == MAT_REUSE_MATRIX) {
3517       if (*mat_n) {
3518         PetscMPIInt subcommsize2;
3519         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3520         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3521         comm_n = PetscObjectComm((PetscObject)*mat_n);
3522       } else {
3523         comm_n = PETSC_COMM_SELF;
3524       }
3525     } else { /* MAT_INITIAL_MATRIX */
3526       PetscMPIInt rank;
3527 
3528       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3529       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3530       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3531       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3532       comm_n = PetscSubcommChild(subcomm);
3533     }
3534     /* flag to destroy *mat_n if not significative */
3535     if (color) destroy_mat = PETSC_TRUE;
3536   } else {
3537     comm_n = comm;
3538   }
3539 
3540   /* prepare send/receive buffers */
3541   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3542   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3543   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3544   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3545   if (nis) {
3546     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3547   }
3548 
3549   /* Get data from local matrices */
3550   if (!isdense) {
3551     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3552     /* TODO: See below some guidelines on how to prepare the local buffers */
3553     /*
3554        send_buffer_vals should contain the raw values of the local matrix
3555        send_buffer_idxs should contain:
3556        - MatType_PRIVATE type
3557        - PetscInt        size_of_l2gmap
3558        - PetscInt        global_row_indices[size_of_l2gmap]
3559        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3560     */
3561   } else {
3562     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3563     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3564     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3565     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3566     send_buffer_idxs[1] = i;
3567     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3568     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3569     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3570     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3571     for (i=0;i<n_sends;i++) {
3572       ilengths_vals[is_indices[i]] = len*len;
3573       ilengths_idxs[is_indices[i]] = len+2;
3574     }
3575   }
3576   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3577   /* additional is (if any) */
3578   if (nis) {
3579     PetscMPIInt psum;
3580     PetscInt j;
3581     for (j=0,psum=0;j<nis;j++) {
3582       PetscInt plen;
3583       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3584       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3585       psum += len+1; /* indices + lenght */
3586     }
3587     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3588     for (j=0,psum=0;j<nis;j++) {
3589       PetscInt plen;
3590       const PetscInt *is_array_idxs;
3591       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3592       send_buffer_idxs_is[psum] = plen;
3593       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3594       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3595       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3596       psum += plen+1; /* indices + lenght */
3597     }
3598     for (i=0;i<n_sends;i++) {
3599       ilengths_idxs_is[is_indices[i]] = psum;
3600     }
3601     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3602   }
3603 
3604   buf_size_idxs = 0;
3605   buf_size_vals = 0;
3606   buf_size_idxs_is = 0;
3607   for (i=0;i<n_recvs;i++) {
3608     buf_size_idxs += (PetscInt)olengths_idxs[i];
3609     buf_size_vals += (PetscInt)olengths_vals[i];
3610     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3611   }
3612   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3613   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3614   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3615 
3616   /* get new tags for clean communications */
3617   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3618   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3619   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3620 
3621   /* allocate for requests */
3622   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3623   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3624   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3625   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3626   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3627   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3628 
3629   /* communications */
3630   ptr_idxs = recv_buffer_idxs;
3631   ptr_vals = recv_buffer_vals;
3632   ptr_idxs_is = recv_buffer_idxs_is;
3633   for (i=0;i<n_recvs;i++) {
3634     source_dest = onodes[i];
3635     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3636     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3637     ptr_idxs += olengths_idxs[i];
3638     ptr_vals += olengths_vals[i];
3639     if (nis) {
3640       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3641       ptr_idxs_is += olengths_idxs_is[i];
3642     }
3643   }
3644   for (i=0;i<n_sends;i++) {
3645     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3646     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3647     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3648     if (nis) {
3649       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3650     }
3651   }
3652   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3653   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3654 
3655   /* assemble new l2g map */
3656   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3657   ptr_idxs = recv_buffer_idxs;
3658   new_local_rows = 0;
3659   for (i=0;i<n_recvs;i++) {
3660     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3661     ptr_idxs += olengths_idxs[i];
3662   }
3663   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3664   ptr_idxs = recv_buffer_idxs;
3665   new_local_rows = 0;
3666   for (i=0;i<n_recvs;i++) {
3667     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3668     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3669     ptr_idxs += olengths_idxs[i];
3670   }
3671   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3672   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3673   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3674 
3675   /* infer new local matrix type from received local matrices type */
3676   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3677   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3678   if (n_recvs) {
3679     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3680     ptr_idxs = recv_buffer_idxs;
3681     for (i=0;i<n_recvs;i++) {
3682       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3683         new_local_type_private = MATAIJ_PRIVATE;
3684         break;
3685       }
3686       ptr_idxs += olengths_idxs[i];
3687     }
3688     switch (new_local_type_private) {
3689       case MATDENSE_PRIVATE:
3690         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3691           new_local_type = MATSEQAIJ;
3692           bs = 1;
3693         } else { /* if I receive only 1 dense matrix */
3694           new_local_type = MATSEQDENSE;
3695           bs = 1;
3696         }
3697         break;
3698       case MATAIJ_PRIVATE:
3699         new_local_type = MATSEQAIJ;
3700         bs = 1;
3701         break;
3702       case MATBAIJ_PRIVATE:
3703         new_local_type = MATSEQBAIJ;
3704         break;
3705       case MATSBAIJ_PRIVATE:
3706         new_local_type = MATSEQSBAIJ;
3707         break;
3708       default:
3709         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3710         break;
3711     }
3712   } else { /* by default, new_local_type is seqdense */
3713     new_local_type = MATSEQDENSE;
3714     bs = 1;
3715   }
3716 
3717   /* create MATIS object if needed */
3718   if (reuse == MAT_INITIAL_MATRIX) {
3719     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3720     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3721   } else {
3722     /* it also destroys the local matrices */
3723     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3724   }
3725   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3726   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3727 
3728   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3729 
3730   /* Global to local map of received indices */
3731   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3732   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3733   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3734 
3735   /* restore attributes -> type of incoming data and its size */
3736   buf_size_idxs = 0;
3737   for (i=0;i<n_recvs;i++) {
3738     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3739     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3740     buf_size_idxs += (PetscInt)olengths_idxs[i];
3741   }
3742   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3743 
3744   /* set preallocation */
3745   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3746   if (!newisdense) {
3747     PetscInt *new_local_nnz=0;
3748 
3749     ptr_vals = recv_buffer_vals;
3750     ptr_idxs = recv_buffer_idxs_local;
3751     if (n_recvs) {
3752       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3753     }
3754     for (i=0;i<n_recvs;i++) {
3755       PetscInt j;
3756       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3757         for (j=0;j<*(ptr_idxs+1);j++) {
3758           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3759         }
3760       } else {
3761         /* TODO */
3762       }
3763       ptr_idxs += olengths_idxs[i];
3764     }
3765     if (new_local_nnz) {
3766       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3767       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3768       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3769       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3770       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3771       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3772     } else {
3773       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3774     }
3775     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3776   } else {
3777     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3778   }
3779 
3780   /* set values */
3781   ptr_vals = recv_buffer_vals;
3782   ptr_idxs = recv_buffer_idxs_local;
3783   for (i=0;i<n_recvs;i++) {
3784     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3785       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3786       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3787       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3788       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3789       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3790     } else {
3791       /* TODO */
3792     }
3793     ptr_idxs += olengths_idxs[i];
3794     ptr_vals += olengths_vals[i];
3795   }
3796   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3797   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3798   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3799   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3800   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3801   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3802 
3803 #if 0
3804   if (!restrict_comm) { /* check */
3805     Vec       lvec,rvec;
3806     PetscReal infty_error;
3807 
3808     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3809     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3810     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3811     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3812     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3813     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3814     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3815     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3816     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3817   }
3818 #endif
3819 
3820   /* assemble new additional is (if any) */
3821   if (nis) {
3822     PetscInt **temp_idxs,*count_is,j,psum;
3823 
3824     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3825     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3826     ptr_idxs = recv_buffer_idxs_is;
3827     psum = 0;
3828     for (i=0;i<n_recvs;i++) {
3829       for (j=0;j<nis;j++) {
3830         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3831         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3832         psum += plen;
3833         ptr_idxs += plen+1; /* shift pointer to received data */
3834       }
3835     }
3836     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3837     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3838     for (i=1;i<nis;i++) {
3839       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3840     }
3841     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3842     ptr_idxs = recv_buffer_idxs_is;
3843     for (i=0;i<n_recvs;i++) {
3844       for (j=0;j<nis;j++) {
3845         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3846         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3847         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3848         ptr_idxs += plen+1; /* shift pointer to received data */
3849       }
3850     }
3851     for (i=0;i<nis;i++) {
3852       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3853       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3854       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3855     }
3856     ierr = PetscFree(count_is);CHKERRQ(ierr);
3857     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3858     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3859   }
3860   /* free workspace */
3861   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3862   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3863   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3864   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3865   if (isdense) {
3866     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3867     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3868   } else {
3869     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3870   }
3871   if (nis) {
3872     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3873     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3874   }
3875   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3876   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3877   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3878   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3879   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3880   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3881   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3882   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3883   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3884   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3885   ierr = PetscFree(onodes);CHKERRQ(ierr);
3886   if (nis) {
3887     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3888     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3889     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3890   }
3891   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3892   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3893     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3894     for (i=0;i<nis;i++) {
3895       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3896     }
3897   }
3898   PetscFunctionReturn(0);
3899 }
3900 
3901 /* temporary hack into ksp private data structure */
3902 #include <petsc/private/kspimpl.h>
3903 
3904 #undef __FUNCT__
3905 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3906 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3907 {
3908   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3909   PC_IS                  *pcis = (PC_IS*)pc->data;
3910   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3911   MatNullSpace           CoarseNullSpace=NULL;
3912   ISLocalToGlobalMapping coarse_islg;
3913   IS                     coarse_is,*isarray;
3914   PetscInt               i,im_active=-1,active_procs=-1;
3915   PetscInt               nis,nisdofs,nisneu;
3916   PC                     pc_temp;
3917   PCType                 coarse_pc_type;
3918   KSPType                coarse_ksp_type;
3919   PetscBool              multilevel_requested,multilevel_allowed;
3920   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3921   Mat                    t_coarse_mat_is;
3922   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3923   PetscMPIInt            all_procs;
3924   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3925   PetscBool              compute_vecs = PETSC_FALSE;
3926   PetscScalar            *array;
3927   PetscErrorCode         ierr;
3928 
3929   PetscFunctionBegin;
3930   /* Assign global numbering to coarse dofs */
3931   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3932     PetscInt ocoarse_size;
3933     compute_vecs = PETSC_TRUE;
3934     ocoarse_size = pcbddc->coarse_size;
3935     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3936     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3937     /* see if we can avoid some work */
3938     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3939       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3940         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3941         coarse_reuse = PETSC_FALSE;
3942       } else { /* we can safely reuse already computed coarse matrix */
3943         coarse_reuse = PETSC_TRUE;
3944       }
3945     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3946       coarse_reuse = PETSC_FALSE;
3947     }
3948     /* reset any subassembling information */
3949     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3950     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3951   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3952     coarse_reuse = PETSC_TRUE;
3953   }
3954 
3955   /* count "active" (i.e. with positive local size) and "void" processes */
3956   im_active = !!(pcis->n);
3957   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3958   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3959   void_procs = all_procs-active_procs;
3960   csin_type_simple = PETSC_TRUE;
3961   redist = PETSC_FALSE;
3962   if (pcbddc->current_level && void_procs) {
3963     csin_ml = PETSC_TRUE;
3964     ncoarse_ml = void_procs;
3965     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
3966     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
3967       csin_ds = PETSC_TRUE;
3968       ncoarse_ds = pcbddc->redistribute_coarse;
3969       redist = PETSC_TRUE;
3970     } else {
3971       csin_ds = PETSC_TRUE;
3972       ncoarse_ds = active_procs;
3973       redist = PETSC_TRUE;
3974     }
3975   } else {
3976     csin_ml = PETSC_FALSE;
3977     ncoarse_ml = all_procs;
3978     if (void_procs) {
3979       csin_ds = PETSC_TRUE;
3980       ncoarse_ds = void_procs;
3981       csin_type_simple = PETSC_FALSE;
3982     } else {
3983       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
3984         csin_ds = PETSC_TRUE;
3985         ncoarse_ds = pcbddc->redistribute_coarse;
3986         redist = PETSC_TRUE;
3987       } else {
3988         csin_ds = PETSC_FALSE;
3989         ncoarse_ds = all_procs;
3990       }
3991     }
3992   }
3993 
3994   /*
3995     test if we can go multilevel: three conditions must be satisfied:
3996     - we have not exceeded the number of levels requested
3997     - we can actually subassemble the active processes
3998     - we can find a suitable number of MPI processes where we can place the subassembled problem
3999   */
4000   multilevel_allowed = PETSC_FALSE;
4001   multilevel_requested = PETSC_FALSE;
4002   if (pcbddc->current_level < pcbddc->max_levels) {
4003     multilevel_requested = PETSC_TRUE;
4004     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
4005       multilevel_allowed = PETSC_FALSE;
4006     } else {
4007       multilevel_allowed = PETSC_TRUE;
4008     }
4009   }
4010   /* determine number of process partecipating to coarse solver */
4011   if (multilevel_allowed) {
4012     ncoarse = ncoarse_ml;
4013     csin = csin_ml;
4014     redist = PETSC_FALSE;
4015   } else {
4016     ncoarse = ncoarse_ds;
4017     csin = csin_ds;
4018   }
4019 
4020   /* creates temporary l2gmap and IS for coarse indexes */
4021   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
4022   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
4023 
4024   /* creates temporary MATIS object for coarse matrix */
4025   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
4026   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4027   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
4028   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4029 #if 0
4030   {
4031     PetscViewer viewer;
4032     char filename[256];
4033     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
4034     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
4035     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4036     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
4037     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4038   }
4039 #endif
4040   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
4041   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
4042   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4043   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4044   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
4045 
4046   /* compute dofs splitting and neumann boundaries for coarse dofs */
4047   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
4048     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
4049     const PetscInt         *idxs;
4050     ISLocalToGlobalMapping tmap;
4051 
4052     /* create map between primal indices (in local representative ordering) and local primal numbering */
4053     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
4054     /* allocate space for temporary storage */
4055     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
4056     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
4057     /* allocate for IS array */
4058     nisdofs = pcbddc->n_ISForDofsLocal;
4059     nisneu = !!pcbddc->NeumannBoundariesLocal;
4060     nis = nisdofs + nisneu;
4061     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
4062     /* dofs splitting */
4063     for (i=0;i<nisdofs;i++) {
4064       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4065       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4066       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4067       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4068       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4069       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4070       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4071       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4072     }
4073     /* neumann boundaries */
4074     if (pcbddc->NeumannBoundariesLocal) {
4075       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4076       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4077       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4078       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4079       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4080       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4081       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4082       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4083     }
4084     /* free memory */
4085     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4086     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4087     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4088   } else {
4089     nis = 0;
4090     nisdofs = 0;
4091     nisneu = 0;
4092     isarray = NULL;
4093   }
4094   /* destroy no longer needed map */
4095   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4096 
4097   /* restrict on coarse candidates (if needed) */
4098   coarse_mat_is = NULL;
4099   if (csin) {
4100     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4101       if (redist) {
4102         PetscMPIInt rank;
4103         PetscInt    spc,n_spc_p1,dest[1],destsize;
4104 
4105         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4106         spc = active_procs/ncoarse;
4107         n_spc_p1 = active_procs%ncoarse;
4108         if (im_active) {
4109           destsize = 1;
4110           if (rank > n_spc_p1*(spc+1)-1) {
4111             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4112           } else {
4113             dest[0] = rank/(spc+1);
4114           }
4115         } else {
4116           destsize = 0;
4117         }
4118         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4119       } else if (csin_type_simple) {
4120         PetscMPIInt rank;
4121         PetscInt    issize,isidx;
4122 
4123         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4124         if (im_active) {
4125           issize = 1;
4126           isidx = (PetscInt)rank;
4127         } else {
4128           issize = 0;
4129           isidx = -1;
4130         }
4131         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4132       } else { /* get a suitable subassembling pattern from MATIS code */
4133         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4134       }
4135 
4136       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4137       if (!redist || ncoarse <= void_procs) {
4138         PetscInt ncoarse_cand,tissize,*nisindices;
4139         PetscInt *coarse_candidates;
4140         const PetscInt* tisindices;
4141 
4142         /* get coarse candidates' ranks in pc communicator */
4143         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4144         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4145         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4146           if (!coarse_candidates[i]) {
4147             coarse_candidates[ncoarse_cand++]=i;
4148           }
4149         }
4150         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4151 
4152 
4153         if (pcbddc->dbg_flag) {
4154           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4155           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4156           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4157           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4158           for (i=0;i<ncoarse_cand;i++) {
4159             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4160           }
4161           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4162           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4163         }
4164         /* shift the pattern on coarse candidates */
4165         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4166         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4167         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4168         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4169         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4170         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4171         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4172       }
4173       if (pcbddc->dbg_flag) {
4174         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4175         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4176         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4177         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4178       }
4179     }
4180     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4181     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4182   } else {
4183     if (pcbddc->dbg_flag) {
4184       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4185       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4186       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4187     }
4188     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4189     coarse_mat_is = t_coarse_mat_is;
4190   }
4191 
4192   /* create local to global scatters for coarse problem */
4193   if (compute_vecs) {
4194     PetscInt lrows;
4195     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4196     if (coarse_mat_is) {
4197       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4198     } else {
4199       lrows = 0;
4200     }
4201     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4202     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4203     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4204     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4205     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4206   }
4207   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4208   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4209 
4210   /* set defaults for coarse KSP and PC */
4211   if (multilevel_allowed) {
4212     coarse_ksp_type = KSPRICHARDSON;
4213     coarse_pc_type = PCBDDC;
4214   } else {
4215     coarse_ksp_type = KSPPREONLY;
4216     coarse_pc_type = PCREDUNDANT;
4217   }
4218 
4219   /* print some info if requested */
4220   if (pcbddc->dbg_flag) {
4221     if (!multilevel_allowed) {
4222       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4223       if (multilevel_requested) {
4224         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4225       } else if (pcbddc->max_levels) {
4226         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4227       }
4228       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4229     }
4230   }
4231 
4232   /* create the coarse KSP object only once with defaults */
4233   if (coarse_mat_is) {
4234     MatReuse coarse_mat_reuse;
4235     PetscViewer dbg_viewer = NULL;
4236     if (pcbddc->dbg_flag) {
4237       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4238       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4239     }
4240     if (!pcbddc->coarse_ksp) {
4241       char prefix[256],str_level[16];
4242       size_t len;
4243       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4244       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4245       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4246       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4247       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4248       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4249       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4250       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4251       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4252       /* prefix */
4253       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4254       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4255       if (!pcbddc->current_level) {
4256         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4257         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4258       } else {
4259         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4260         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4261         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4262         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4263         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4264         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4265       }
4266       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4267       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4268       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4269       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4270       ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4271       /* allow user customization */
4272       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4273     }
4274     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4275     if (nisdofs) {
4276       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4277       for (i=0;i<nisdofs;i++) {
4278         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4279       }
4280     }
4281     if (nisneu) {
4282       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4283       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4284     }
4285 
4286     /* get some info after set from options */
4287     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4288     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4289     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4290     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4291     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4292       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4293       isbddc = PETSC_FALSE;
4294     }
4295     if (isredundant) {
4296       KSP inner_ksp;
4297       PC inner_pc;
4298       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4299       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4300       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4301     }
4302 
4303     /* assemble coarse matrix */
4304     if (coarse_reuse) {
4305       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4306       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4307       coarse_mat_reuse = MAT_REUSE_MATRIX;
4308     } else {
4309       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4310     }
4311     if (isbddc || isnn) {
4312       if (pcbddc->coarsening_ratio > 1) {
4313         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4314           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4315           if (pcbddc->dbg_flag) {
4316             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4317             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4318             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4319             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4320           }
4321         }
4322         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4323       } else {
4324         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4325         coarse_mat = coarse_mat_is;
4326       }
4327     } else {
4328       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4329     }
4330     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4331 
4332     /* propagate symmetry info to coarse matrix */
4333     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
4334     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4335 
4336     /* set operators */
4337     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4338     if (pcbddc->dbg_flag) {
4339       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4340     }
4341   } else { /* processes non partecipating to coarse solver (if any) */
4342     coarse_mat = 0;
4343   }
4344   ierr = PetscFree(isarray);CHKERRQ(ierr);
4345 #if 0
4346   {
4347     PetscViewer viewer;
4348     char filename[256];
4349     sprintf(filename,"coarse_mat.m");
4350     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4351     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4352     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4353     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4354   }
4355 #endif
4356 
4357   /* Compute coarse null space (special handling by BDDC only) */
4358   if (pcbddc->NullSpace) {
4359     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4360   }
4361 
4362   if (pcbddc->coarse_ksp) {
4363     Vec crhs,csol;
4364     PetscBool ispreonly;
4365     if (CoarseNullSpace) {
4366       if (isbddc) {
4367         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4368       } else {
4369         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4370       }
4371     }
4372     /* setup coarse ksp */
4373     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4374     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4375     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4376     /* hack */
4377     if (!csol) {
4378       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4379     }
4380     if (!crhs) {
4381       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4382     }
4383     /* Check coarse problem if in debug mode or if solving with an iterative method */
4384     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4385     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4386       KSP       check_ksp;
4387       KSPType   check_ksp_type;
4388       PC        check_pc;
4389       Vec       check_vec,coarse_vec;
4390       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4391       PetscInt  its;
4392       PetscBool compute_eigs;
4393       PetscReal *eigs_r,*eigs_c;
4394       PetscInt  neigs;
4395       const char *prefix;
4396 
4397       /* Create ksp object suitable for estimation of extreme eigenvalues */
4398       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4399       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4400       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4401       if (ispreonly) {
4402         check_ksp_type = KSPPREONLY;
4403         compute_eigs = PETSC_FALSE;
4404       } else {
4405         check_ksp_type = KSPGMRES;
4406         compute_eigs = PETSC_TRUE;
4407       }
4408       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4409       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4410       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4411       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4412       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4413       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4414       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4415       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4416       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4417       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4418       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4419       /* create random vec */
4420       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4421       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4422       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4423       if (CoarseNullSpace) {
4424         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4425       }
4426       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4427       /* solve coarse problem */
4428       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4429       if (CoarseNullSpace) {
4430         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4431       }
4432       /* set eigenvalue estimation if preonly has not been requested */
4433       if (compute_eigs) {
4434         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4435         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4436         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4437         lambda_max = eigs_r[neigs-1];
4438         lambda_min = eigs_r[0];
4439         if (pcbddc->use_coarse_estimates) {
4440           if (lambda_max>lambda_min) {
4441             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4442             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4443           }
4444         }
4445       }
4446 
4447       /* check coarse problem residual error */
4448       if (pcbddc->dbg_flag) {
4449         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4450         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4451         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4452         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4453         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4454         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4455         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4456         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4457         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4458         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4459         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4460         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4461         if (compute_eigs) {
4462           PetscReal lambda_max_s,lambda_min_s;
4463           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4464           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4465           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4466           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4467           for (i=0;i<neigs;i++) {
4468             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4469           }
4470         }
4471         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4472         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4473       }
4474       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4475       if (compute_eigs) {
4476         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4477         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4478       }
4479     }
4480   }
4481   /* print additional info */
4482   if (pcbddc->dbg_flag) {
4483     /* waits until all processes reaches this point */
4484     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4485     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4486     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4487   }
4488 
4489   /* free memory */
4490   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4491   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4492   PetscFunctionReturn(0);
4493 }
4494 
4495 #undef __FUNCT__
4496 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4497 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4498 {
4499   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4500   PC_IS*         pcis = (PC_IS*)pc->data;
4501   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4502   PetscInt       i,coarse_size=0;
4503   PetscInt       *local_primal_indices=NULL;
4504   PetscErrorCode ierr;
4505 
4506   PetscFunctionBegin;
4507   /* Compute global number of coarse dofs */
4508   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
4509     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
4510   }
4511   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
4512 
4513   /* check numbering */
4514   if (pcbddc->dbg_flag) {
4515     PetscScalar coarsesum,*array;
4516     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4517 
4518     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4519     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4520     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4521     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4522     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4523     for (i=0;i<pcbddc->local_primal_size;i++) {
4524       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4525     }
4526     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4527     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4528     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4529     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4530     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4531     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4532     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4533     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4534     for (i=0;i<pcis->n;i++) {
4535       if (array[i] == 1.0) {
4536         set_error = PETSC_TRUE;
4537         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4538       }
4539     }
4540     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4541     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4542     for (i=0;i<pcis->n;i++) {
4543       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4544     }
4545     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4546     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4547     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4548     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4549     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4550     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4551     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4552       PetscInt *gidxs;
4553 
4554       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
4555       ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
4556       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4557       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4558       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4559       for (i=0;i<pcbddc->local_primal_size;i++) {
4560         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr);
4561       }
4562       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4563       ierr = PetscFree(gidxs);CHKERRQ(ierr);
4564     }
4565     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4566     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4567   }
4568   /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */
4569   /* get back data */
4570   *coarse_size_n = coarse_size;
4571   *local_primal_indices_n = local_primal_indices;
4572   PetscFunctionReturn(0);
4573 }
4574 
4575 #undef __FUNCT__
4576 #define __FUNCT__ "PCBDDCGlobalToLocal"
4577 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4578 {
4579   IS             localis_t;
4580   PetscInt       i,lsize,*idxs,n;
4581   PetscScalar    *vals;
4582   PetscErrorCode ierr;
4583 
4584   PetscFunctionBegin;
4585   /* get indices in local ordering exploiting local to global map */
4586   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4587   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4588   for (i=0;i<lsize;i++) vals[i] = 1.0;
4589   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4590   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4591   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4592   if (idxs) { /* multilevel guard */
4593     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4594   }
4595   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4596   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4597   ierr = PetscFree(vals);CHKERRQ(ierr);
4598   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4599   /* now compute set in local ordering */
4600   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4601   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4602   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4603   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4604   for (i=0,lsize=0;i<n;i++) {
4605     if (PetscRealPart(vals[i]) > 0.5) {
4606       lsize++;
4607     }
4608   }
4609   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4610   for (i=0,lsize=0;i<n;i++) {
4611     if (PetscRealPart(vals[i]) > 0.5) {
4612       idxs[lsize++] = i;
4613     }
4614   }
4615   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4616   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4617   *localis = localis_t;
4618   PetscFunctionReturn(0);
4619 }
4620 
4621 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4622 #undef __FUNCT__
4623 #define __FUNCT__ "PCBDDCMatMult_Private"
4624 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4625 {
4626   PCBDDCChange_ctx change_ctx;
4627   PetscErrorCode   ierr;
4628 
4629   PetscFunctionBegin;
4630   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4631   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4632   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4633   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4634   PetscFunctionReturn(0);
4635 }
4636 
4637 #undef __FUNCT__
4638 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4639 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4640 {
4641   PCBDDCChange_ctx change_ctx;
4642   PetscErrorCode   ierr;
4643 
4644   PetscFunctionBegin;
4645   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4646   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4647   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4648   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4649   PetscFunctionReturn(0);
4650 }
4651 
4652 #undef __FUNCT__
4653 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4654 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4655 {
4656   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4657   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4658   PetscInt            *used_xadj,*used_adjncy;
4659   PetscBool           free_used_adj;
4660   PetscErrorCode      ierr;
4661 
4662   PetscFunctionBegin;
4663   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4664   free_used_adj = PETSC_FALSE;
4665   if (pcbddc->sub_schurs_layers == -1) {
4666     used_xadj = NULL;
4667     used_adjncy = NULL;
4668   } else {
4669     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4670       used_xadj = pcbddc->mat_graph->xadj;
4671       used_adjncy = pcbddc->mat_graph->adjncy;
4672     } else if (pcbddc->computed_rowadj) {
4673       used_xadj = pcbddc->mat_graph->xadj;
4674       used_adjncy = pcbddc->mat_graph->adjncy;
4675     } else {
4676       PetscBool      flg_row=PETSC_FALSE;
4677       const PetscInt *xadj,*adjncy;
4678       PetscInt       nvtxs;
4679 
4680       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4681       if (flg_row) {
4682         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4683         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4684         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4685         free_used_adj = PETSC_TRUE;
4686       } else {
4687         pcbddc->sub_schurs_layers = -1;
4688         used_xadj = NULL;
4689         used_adjncy = NULL;
4690       }
4691       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4692     }
4693   }
4694 
4695   /* set Schur complement solver for interior variables (used only when MUMPS is not present) */
4696   if (!sub_schurs->use_mumps) {
4697     ierr = MatSchurComplementSetKSP(sub_schurs->S,pcbddc->ksp_D);CHKERRQ(ierr);
4698   }
4699 
4700   /* setup sub_schurs data */
4701   ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,pcbddc->adaptive_invert_Stildas,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4702 
4703   /* free adjacency */
4704   if (free_used_adj) {
4705     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4706   }
4707   PetscFunctionReturn(0);
4708 }
4709 
4710 #undef __FUNCT__
4711 #define __FUNCT__ "PCBDDCInitSubSchurs"
4712 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4713 {
4714   PC_IS               *pcis=(PC_IS*)pc->data;
4715   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4716   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4717   PCBDDCGraph         graph;
4718   Mat                 S_j;
4719   PetscErrorCode      ierr;
4720 
4721   PetscFunctionBegin;
4722   /* attach interface graph for determining subsets */
4723   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4724     IS verticesIS;
4725 
4726     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4727     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4728     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr);
4729     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr);
4730     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4731     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4732 /*
4733     if (pcbddc->dbg_flag) {
4734       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4735     }
4736 */
4737   } else {
4738     graph = pcbddc->mat_graph;
4739   }
4740 
4741   /* Create Schur complement matrix */
4742   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4743 
4744   /* sub_schurs init */
4745   ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr);
4746   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4747   /* free graph struct */
4748   if (pcbddc->sub_schurs_rebuild) {
4749     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4750   }
4751   PetscFunctionReturn(0);
4752 }
4753