xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision ca8b9ea9d398bfc6e3d079bac51f7510782e32e7)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscScalar     *Smult,*Seigv;
20   PetscReal       *eigs,thresh;
21   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscErrorCode  ierr;
26 
27   PetscFunctionBegin;
28   if (!sub_schurs->use_mumps) {
29     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
30   }
31 
32   if (pcbddc->dbg_flag) {
33     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
34     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
35     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
36     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
37   }
38 
39   if (pcbddc->dbg_flag) {
40     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
41   }
42 
43   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
44     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
45   }
46 
47   /* max size of subsets */
48   mss = 0;
49   for (i=0;i<sub_schurs->n_subs;i++) {
50     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
51       PetscInt subset_size;
52       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
53       mss = PetscMax(mss,subset_size);
54     }
55   }
56 
57   /* min/max and threshold */
58   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
59   nmin = pcbddc->adaptive_nmin > -1 ? pcbddc->adaptive_nmin : 1;
60   nmax = PetscMax(nmin,nmax);
61   if (pcbddc->adaptive_threshold > 1.0) {
62     thresh = 1.0/pcbddc->adaptive_threshold;
63   } else {
64     thresh = 1.0;
65   }
66 
67   /* allocate lapack workspace */
68   cum = cum2 = 0;
69   maxneigs = 0;
70   for (i=0;i<sub_schurs->n_subs;i++) {
71     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
72       PetscInt n,subset_size;
73       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
74       n = PetscMin(subset_size,nmax);
75       cum += subset_size*n;
76       cum2 += n;
77       maxneigs = PetscMax(maxneigs,n);
78     }
79   }
80 
81   if (mss) {
82     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
83       PetscBLASInt B_itype = 1;
84       PetscBLASInt B_N = mss;
85       PetscReal    zero = 0.0;
86       PetscReal    eps = 0.0; /* dlamch? */
87 
88       B_lwork = -1;
89       S = NULL;
90       St = NULL;
91       eigs = NULL;
92       eigv = NULL;
93       B_iwork = NULL;
94       B_ifail = NULL;
95       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
96 #if defined(PETSC_USE_COMPLEX)
97       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
98 #else
99       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
100 #endif
101       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
102       ierr = PetscFPTrapPop();CHKERRQ(ierr);
103     } else {
104         /* TODO */
105     }
106   } else {
107     lwork = 0;
108   }
109 
110   nv = 0;
111   if (sub_schurs->is_Ej_com) { /* complement of subsets, each entry is a vertex */
112     ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr);
113   }
114   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
115   ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs,
116                       B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
117 #if defined(PETSC_USE_COMPLEX)
118   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
119 #endif
120   ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr);
121   ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
122                       nv+cum2+1,&pcbddc->adaptive_constraints_ptrs,
123                       nv+cum,&pcbddc->adaptive_constraints_idxs,
124                       nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
125   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
126 
127   maxneigs = 0;
128   cum = cum2 = cumarray = 0;
129   if (sub_schurs->is_Ej_com) {
130     const PetscInt *idxs;
131 
132     ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
133     for (cum=0;cum<nv;cum++) {
134       pcbddc->adaptive_constraints_n[cum] = 1;
135       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
136       pcbddc->adaptive_constraints_ptrs[cum] = cum;
137       pcbddc->adaptive_constraints_data[cum] = 1.0;
138     }
139     cum2 = cum;
140     ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
141   }
142 
143   if (mss) { /* multilevel */
144     if (pcbddc->use_deluxe_scaling) {
145       ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
146     } else {
147       ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr);
148     }
149     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
150   }
151 
152   for (i=0;i<sub_schurs->n_subs;i++) {
153     PetscInt j,subset_size;
154 
155     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
156     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
157       const PetscInt *idxs;
158       PetscScalar    one = 1.0,scalar_zero = 0.0;
159       PetscReal      zero=0.0;
160       PetscBLASInt   B_N;
161 
162       /* S should be copied since we need it for deluxe scaling */
163       if (sub_schurs->is_hermitian) {
164         PetscInt j;
165         for (j=0;j<subset_size;j++) {
166           ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
167         }
168         for (j=0;j<subset_size;j++) {
169           ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
170         }
171       } else {
172         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
173         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
174       }
175       /* is always this the right matrix? */
176       ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
177 
178       /* we could reuse space already allocated when building sum_S_Ej_tilda_all */
179       /* St = Starray+cumarray; */
180 
181       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
182         PetscBLASInt B_itype = 1;
183         PetscBLASInt B_IL = 1, B_IU;
184         PetscReal    eps = -1.0; /* dlamch? */
185         PetscInt     nmin_s;
186 
187         /* ask for eigenvalues lower than thresh */
188         if (pcbddc->dbg_flag) {
189           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d.\n",i,sub_schurs->n_subs);
190         }
191         ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
192         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
193 #if defined(PETSC_USE_COMPLEX)
194         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
195 #else
196         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
197 #endif
198         ierr = PetscFPTrapPop();CHKERRQ(ierr);
199         if (B_ierr) {
200           if (B_ierr < 0 ) {
201             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
202           } else if (B_ierr <= B_N) {
203             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
204           } else {
205             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
206           }
207         }
208 
209         if (B_neigs > nmax) {
210           if (pcbddc->dbg_flag) {
211             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
212           }
213           B_neigs = nmax;
214         }
215 
216         nmin_s = PetscMin(nmin,B_N);
217         if (B_neigs < nmin_s) {
218           PetscBLASInt B_neigs2;
219 
220           B_IL = B_neigs + 1;
221           ierr = PetscBLASIntCast(nmin_s,&B_IU);CHKERRQ(ierr);
222           if (pcbddc->dbg_flag) {
223             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
224           }
225           if (sub_schurs->is_hermitian) {
226             PetscInt j;
227             for (j=0;j<subset_size;j++) {
228               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
229             }
230             for (j=0;j<subset_size;j++) {
231               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
232             }
233           } else {
234             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
235             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
236           }
237           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
238 #if defined(PETSC_USE_COMPLEX)
239           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
240 #else
241           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
242 #endif
243           ierr = PetscFPTrapPop();CHKERRQ(ierr);
244           B_neigs += B_neigs2;
245         }
246         if (B_ierr) {
247           if (B_ierr < 0 ) {
248             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
249           } else if (B_ierr <= B_N) {
250             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
251           } else {
252             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
253           }
254         }
255         if (pcbddc->dbg_flag) {
256           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);
257           for (j=0;j<B_neigs;j++) {
258             if (eigs[j] == 0.0) {
259               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");
260             } else {
261               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",1.0/eigs[j]);
262             }
263           }
264         }
265       } else {
266           /* TODO */
267       }
268       maxneigs = PetscMax(B_neigs,maxneigs);
269       pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
270 
271       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
272       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&scalar_zero,Seigv,&B_N));
273       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
274 
275       if (pcbddc->dbg_flag > 1) {
276         PetscInt ii;
277         for (ii=0;ii<B_neigs;ii++) {
278           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
279           for (j=0;j<B_N;j++) {
280             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e %1.4e\n",eigv[ii*B_N+j],Seigv[ii*B_N+j]);
281           }
282         }
283       }
284       ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
285       for (j=0;j<B_neigs;j++) {
286 #if 0
287         {
288           PetscBLASInt Blas_N,Blas_one = 1.0;
289           PetscScalar norm;
290           ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
291           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
292           if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
293             norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
294           } else {
295             norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
296           }
297           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
298         }
299 #endif
300         ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
301         pcbddc->adaptive_constraints_ptrs[cum++] = cum2;
302         cum2 += subset_size;
303       }
304       ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
305     }
306     /* shift for next computation */
307     cumarray += subset_size*subset_size;
308   }
309   if (pcbddc->dbg_flag) {
310     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
311   }
312   pcbddc->adaptive_constraints_ptrs[cum] = cum2;
313   ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr);
314 
315   if (mss) {
316     if (pcbddc->use_deluxe_scaling) {
317       ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
318     } else {
319       ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr);
320     }
321     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
322   }
323   ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
324 #if defined(PETSC_USE_COMPLEX)
325   ierr = PetscFree(rwork);CHKERRQ(ierr);
326 #endif
327   if (pcbddc->dbg_flag) {
328     PetscInt maxneigs_r;
329     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
330     ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
331   }
332   PetscFunctionReturn(0);
333 }
334 
335 #undef __FUNCT__
336 #define __FUNCT__ "PCBDDCSetUpSolvers"
337 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
338 {
339   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
340   PetscScalar    *coarse_submat_vals;
341   PetscErrorCode ierr;
342 
343   PetscFunctionBegin;
344   /* Setup local scatters R_to_B and (optionally) R_to_D */
345   /* PCBDDCSetUpLocalWorkVectors should be called first! */
346   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
347 
348   /* Setup local neumann solver ksp_R */
349   /* PCBDDCSetUpLocalScatters should be called first! */
350   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
351 
352   /* Change global null space passed in by the user if change of basis has been requested */
353   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
354     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
355   }
356 
357   /*
358      Setup local correction and local part of coarse basis.
359      Gives back the dense local part of the coarse matrix in column major ordering
360   */
361   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
362 
363   /* Compute total number of coarse nodes and setup coarse solver */
364   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
365 
366   /* free */
367   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
368   PetscFunctionReturn(0);
369 }
370 
371 #undef __FUNCT__
372 #define __FUNCT__ "PCBDDCResetCustomization"
373 PetscErrorCode PCBDDCResetCustomization(PC pc)
374 {
375   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
376   PetscErrorCode ierr;
377 
378   PetscFunctionBegin;
379   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
380   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
381   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
382   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
383   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
384   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
385   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
386   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
387   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
388   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
389   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
390   PetscFunctionReturn(0);
391 }
392 
393 #undef __FUNCT__
394 #define __FUNCT__ "PCBDDCResetTopography"
395 PetscErrorCode PCBDDCResetTopography(PC pc)
396 {
397   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
398   PetscErrorCode ierr;
399 
400   PetscFunctionBegin;
401   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
402   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
403   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
404   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
405   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
406   PetscFunctionReturn(0);
407 }
408 
409 #undef __FUNCT__
410 #define __FUNCT__ "PCBDDCResetSolvers"
411 PetscErrorCode PCBDDCResetSolvers(PC pc)
412 {
413   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
414   PetscScalar    *array;
415   PetscErrorCode ierr;
416 
417   PetscFunctionBegin;
418   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
419   if (pcbddc->coarse_phi_B) {
420     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
421     ierr = PetscFree(array);CHKERRQ(ierr);
422   }
423   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
424   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
425   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
426   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
427   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
428   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
429   if (pcbddc->local_auxmat2) {
430     ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
431     ierr = PetscFree(array);CHKERRQ(ierr);
432   }
433   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
434   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
435   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
436   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
437   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
438   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
439   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
440   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
441   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
442   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
443   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
444   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
445   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
446   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
447   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
448   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
454 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
455 {
456   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
457   PC_IS          *pcis = (PC_IS*)pc->data;
458   VecType        impVecType;
459   PetscInt       n_constraints,n_R,old_size;
460   PetscErrorCode ierr;
461 
462   PetscFunctionBegin;
463   if (!pcbddc->ConstraintMatrix) {
464     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
465   }
466   /* get sizes */
467   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
468   n_R = pcis->n-pcbddc->n_actual_vertices;
469   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
470   /* local work vectors (try to avoid unneeded work)*/
471   /* R nodes */
472   old_size = -1;
473   if (pcbddc->vec1_R) {
474     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
475   }
476   if (n_R != old_size) {
477     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
478     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
479     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
480     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
481     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
482     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
483   }
484   /* local primal dofs */
485   old_size = -1;
486   if (pcbddc->vec1_P) {
487     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
488   }
489   if (pcbddc->local_primal_size != old_size) {
490     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
491     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
492     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
493     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
494   }
495   /* local explicit constraints */
496   old_size = -1;
497   if (pcbddc->vec1_C) {
498     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
499   }
500   if (n_constraints && n_constraints != old_size) {
501     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
502     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
503     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
504     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
505   }
506   PetscFunctionReturn(0);
507 }
508 
509 #undef __FUNCT__
510 #define __FUNCT__ "PCBDDCSetUpCorrection"
511 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
512 {
513   PetscErrorCode         ierr;
514   /* pointers to pcis and pcbddc */
515   PC_IS*                 pcis = (PC_IS*)pc->data;
516   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
517   /* submatrices of local problem */
518   Mat                    A_RV,A_VR,A_VV;
519   /* submatrices of local coarse problem */
520   Mat                    S_VV,S_CV,S_VC,S_CC;
521   /* working matrices */
522   Mat                    C_CR;
523   /* additional working stuff */
524   PC                     pc_R;
525   Mat                    F;
526   PetscBool              isLU,isCHOL,isILU;
527 
528   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
529   PetscScalar            *work;
530   PetscInt               *idx_V_B;
531   PetscInt               n,n_vertices,n_constraints;
532   PetscInt               i,n_R,n_D,n_B;
533   PetscBool              unsymmetric_check;
534   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
535   MatType                impMatType;
536   /* some shortcuts to scalars */
537   PetscScalar            one=1.0,m_one=-1.0;
538 
539   PetscFunctionBegin;
540   /* get number of vertices (corners plus constraints with change of basis)
541      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
542   n_vertices = pcbddc->n_actual_vertices;
543   n_constraints = pcbddc->local_primal_size-n_vertices;
544   /* Set Non-overlapping dimensions */
545   n_B = pcis->n_B; n_D = pcis->n - n_B;
546   n_R = pcis->n-n_vertices;
547 
548   /* Set types for local objects needed by BDDC precondtioner */
549   impMatType = MATSEQDENSE;
550 
551   /* vertices in boundary numbering */
552   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
553   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
554   if (i != n_vertices) {
555     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
556   }
557 
558   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
559   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
560   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
561   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
562   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
563   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
564   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
565   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
566   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
567   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
568 
569   unsymmetric_check = PETSC_FALSE;
570   /* allocate workspace */
571   n = 0;
572   if (n_constraints) {
573     n += n_R*n_constraints;
574   }
575   if (n_vertices) {
576     n = PetscMax(2*n_R*n_vertices,n);
577   }
578   if (!pcbddc->issym) {
579     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
580     unsymmetric_check = PETSC_TRUE;
581   }
582   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
583 
584   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
585   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
586   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
587   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
588   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
589   if (isLU || isILU || isCHOL) {
590     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
591   } else {
592     F = NULL;
593   }
594 
595   /* Precompute stuffs needed for preprocessing and application of BDDC*/
596   if (n_constraints) {
597     Mat M1,M2,M3;
598     IS  is_aux;
599     /* see if we can save some allocations */
600     if (pcbddc->local_auxmat2) {
601       PetscInt on_R,on_constraints;
602       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
603       if (on_R != n_R || on_constraints != n_constraints) {
604         PetscScalar *marray;
605 
606         ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr);
607         ierr = PetscFree(marray);CHKERRQ(ierr);
608         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
609         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
610       }
611     }
612     /* auxiliary matrices */
613     if (!pcbddc->local_auxmat2) {
614       PetscScalar *marray;
615 
616       ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr);
617       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr);
618       marray += n_R*n_constraints;
619       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr);
620     }
621 
622     /* Extract constraints on R nodes: C_{CR}  */
623     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
624     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
625     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
626 
627     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
628     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
629     for (i=0;i<n_constraints;i++) {
630       const PetscScalar *row_cmat_values;
631       const PetscInt    *row_cmat_indices;
632       PetscInt          size_of_constraint,j;
633 
634       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
635       for (j=0;j<size_of_constraint;j++) {
636         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
637       }
638       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
639     }
640     if (F) {
641       Mat B;
642 
643       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
644       ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr);
645       ierr = MatDestroy(&B);CHKERRQ(ierr);
646     } else {
647       PetscScalar *xarray;
648       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
649       for (i=0;i<n_constraints;i++) {
650         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
651         ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr);
652         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
653         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
654         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
655       }
656       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
657     }
658 
659     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
660     ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
661     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
662     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
663     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
664     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
665     ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr);
666     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
667     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
668     ierr = MatDestroy(&M2);CHKERRQ(ierr);
669     ierr = MatDestroy(&M3);CHKERRQ(ierr);
670     /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */
671     ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
672     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
673     ierr = MatDestroy(&M1);CHKERRQ(ierr);
674   }
675   /* Get submatrices from subdomain matrix */
676   if (n_vertices) {
677     Mat       newmat;
678     IS        is_aux;
679     PetscInt  ibs,mbs;
680     PetscBool issbaij;
681 
682     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
683     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
684     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
685     if (ibs != mbs) { /* need to convert to SEQAIJ */
686       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
687       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
688       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
689       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
690       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
691     } else {
692       /* this is safe */
693       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
694       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
695       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
696         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
697         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
698         ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
699         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
700         ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
701       } else {
702         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
703         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
704       }
705     }
706     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
707   }
708 
709   /* Matrix of coarse basis functions (local) */
710   if (pcbddc->coarse_phi_B) {
711     PetscInt on_B,on_primal,on_D=n_D;
712     if (pcbddc->coarse_phi_D) {
713       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
714     }
715     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
716     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
717       PetscScalar *marray;
718 
719       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
720       ierr = PetscFree(marray);CHKERRQ(ierr);
721       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
722       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
723       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
724       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
725     }
726   }
727 
728   if (!pcbddc->coarse_phi_B) {
729     PetscScalar *marray;
730 
731     n = n_B*pcbddc->local_primal_size;
732     if (pcbddc->switch_static || pcbddc->dbg_flag) {
733       n += n_D*pcbddc->local_primal_size;
734     }
735     if (!pcbddc->issym) {
736       n *= 2;
737     }
738     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
739     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
740     n = n_B*pcbddc->local_primal_size;
741     if (pcbddc->switch_static || pcbddc->dbg_flag) {
742       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
743       n += n_D*pcbddc->local_primal_size;
744     }
745     if (!pcbddc->issym) {
746       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
747       if (pcbddc->switch_static || pcbddc->dbg_flag) {
748         n = n_B*pcbddc->local_primal_size;
749         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
750       }
751     } else {
752       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
753       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
754       if (pcbddc->switch_static || pcbddc->dbg_flag) {
755         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
756         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
757       }
758     }
759   }
760   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
761   /* vertices */
762   if (n_vertices) {
763 
764     if (n_R) {
765       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
766       PetscBLASInt B_N,B_one = 1;
767       PetscScalar  *x,*y;
768 
769       ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
770       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
771       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
772       if (F) {
773         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
774       } else {
775         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
776         for (i=0;i<n_vertices;i++) {
777           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
778           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
779           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
780           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
781           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
782         }
783         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
784       }
785       ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr);
786       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
787       if (n_constraints) {
788         Mat B;
789         ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
790         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
791         ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
792         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
793         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
794         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
795         ierr = MatDestroy(&B);CHKERRQ(ierr);
796       }
797       ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
798       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
799       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
800       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
801       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
802       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
803       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
804       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
805       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
806       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
807       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
808       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
809     } else {
810       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
811       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
812     }
813     /* coarse basis functions */
814     for (i=0;i<n_vertices;i++) {
815       PetscScalar *y;
816 
817       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
818       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
819       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
820       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
821       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
822       y[n_B*i+idx_V_B[i]] = 1.0;
823       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
824       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
825 
826       if (pcbddc->switch_static || pcbddc->dbg_flag) {
827         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
828         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
829         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
830         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
831         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
832         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
833       }
834       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
835     }
836     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
837     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
838   }
839 
840   if (n_constraints) {
841     Mat B;
842 
843     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
844     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
845     ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
846     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
847     if (n_vertices) {
848       ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr);
849     }
850     ierr = MatDestroy(&B);CHKERRQ(ierr);
851     /* coarse basis functions */
852     for (i=0;i<n_constraints;i++) {
853       PetscScalar *y;
854 
855       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
856       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
857       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
858       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
859       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
860       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
861       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
862 
863       if (pcbddc->switch_static || pcbddc->dbg_flag) {
864         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
865         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
866         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
867         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
869         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
870       }
871       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
872     }
873   }
874 
875   /* compute other basis functions for non-symmetric problems */
876   if (!pcbddc->issym) {
877     Mat B,X;
878 
879     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr);
880 
881     if (n_constraints) {
882       Mat S_CCT,B_C;
883 
884       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
885       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
886       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
887       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
888       if (n_vertices) {
889         Mat B_V,S_VCT;
890 
891         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
892         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
893         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
894         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
895         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
896       }
897       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
898     }
899     if (n_vertices && n_R) {
900       Mat          A_VRT;
901       PetscBLASInt B_N,B_one = 1;
902 
903       if (!n_constraints) { /* if there are no constraints, reset work */
904         ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
905       }
906       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr);
907       ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
908       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
909       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one));
910       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
911     }
912 
913     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr);
914     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
915       for (i=0;i<pcbddc->local_primal_size;i++) {
916         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
917         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
918         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
919         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
920         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
921       }
922     } else {
923       for (i=0;i<pcbddc->local_primal_size;i++) {
924         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
925         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
926         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
927         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
928         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
929       }
930     }
931     ierr = MatDestroy(&B);CHKERRQ(ierr);
932     /* coarse basis functions */
933     for (i=0;i<pcbddc->local_primal_size;i++) {
934       PetscScalar *y;
935 
936       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
937       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
938       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
939       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
940       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
941       if (i<n_vertices) {
942         y[n_B*i+idx_V_B[i]] = 1.0;
943       }
944       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
945       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
946 
947       if (pcbddc->switch_static || pcbddc->dbg_flag) {
948         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
949         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
950         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
951         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
952         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
953         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
954       }
955       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
956     }
957     ierr = MatDestroy(&X);CHKERRQ(ierr);
958   }
959 
960   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
961   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
962   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
963   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
964   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
965   /* Checking coarse_sub_mat and coarse basis functios */
966   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
967   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
968   if (pcbddc->dbg_flag) {
969     Mat         coarse_sub_mat;
970     Mat         AUXMAT,TM1,TM2,TM3,TM4;
971     Mat         coarse_phi_D,coarse_phi_B;
972     Mat         coarse_psi_D,coarse_psi_B;
973     Mat         A_II,A_BB,A_IB,A_BI;
974     MatType     checkmattype=MATSEQAIJ;
975     PetscReal   real_value;
976 
977     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
978     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
979     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
980     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
981     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
982     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
983     if (unsymmetric_check) {
984       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
985       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
986     }
987     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
988 
989     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
990     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr);
991     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
992     if (unsymmetric_check) {
993       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
994       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
995       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
996       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
997       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
998       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
999       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1000       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1001       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1002       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1003       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1004       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1005     } else {
1006       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1007       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1008       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1009       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1010       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1011       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1012       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1013       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1014     }
1015     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1016     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1017     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1018     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1019     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1020     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
1021 
1022     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1023     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1024     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1025     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1026     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1027     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1028     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1029     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1030     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1031     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1032     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1033     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1034     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1035     if (unsymmetric_check) {
1036       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1037       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1038     }
1039     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1040   }
1041 
1042   /* free memory */
1043   ierr = PetscFree(work);CHKERRQ(ierr);
1044   if (n_vertices) {
1045     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1046   }
1047   if (n_constraints) {
1048     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1049   }
1050   /* get back data */
1051   *coarse_submat_vals_n = coarse_submat_vals;
1052   PetscFunctionReturn(0);
1053 }
1054 
1055 #undef __FUNCT__
1056 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1057 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1058 {
1059   Mat            *work_mat;
1060   IS             isrow_s,iscol_s;
1061   PetscBool      rsorted,csorted;
1062   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1063   PetscErrorCode ierr;
1064 
1065   PetscFunctionBegin;
1066   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1067   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1068   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1069   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1070 
1071   if (!rsorted) {
1072     const PetscInt *idxs;
1073     PetscInt *idxs_sorted,i;
1074 
1075     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1076     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1077     for (i=0;i<rsize;i++) {
1078       idxs_perm_r[i] = i;
1079     }
1080     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1081     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1082     for (i=0;i<rsize;i++) {
1083       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1084     }
1085     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1086     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1087   } else {
1088     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1089     isrow_s = isrow;
1090   }
1091 
1092   if (!csorted) {
1093     if (isrow == iscol) {
1094       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1095       iscol_s = isrow_s;
1096     } else {
1097       const PetscInt *idxs;
1098       PetscInt *idxs_sorted,i;
1099 
1100       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1101       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1102       for (i=0;i<csize;i++) {
1103         idxs_perm_c[i] = i;
1104       }
1105       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1106       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1107       for (i=0;i<csize;i++) {
1108         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1109       }
1110       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1111       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1112     }
1113   } else {
1114     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1115     iscol_s = iscol;
1116   }
1117 
1118   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1119 
1120   if (!rsorted || !csorted) {
1121     Mat      new_mat;
1122     IS       is_perm_r,is_perm_c;
1123 
1124     if (!rsorted) {
1125       PetscInt *idxs_r,i;
1126       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1127       for (i=0;i<rsize;i++) {
1128         idxs_r[idxs_perm_r[i]] = i;
1129       }
1130       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1131       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1132     } else {
1133       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1134     }
1135     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1136 
1137     if (!csorted) {
1138       if (isrow_s == iscol_s) {
1139         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1140         is_perm_c = is_perm_r;
1141       } else {
1142         PetscInt *idxs_c,i;
1143         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1144         for (i=0;i<csize;i++) {
1145           idxs_c[idxs_perm_c[i]] = i;
1146         }
1147         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1148         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1149       }
1150     } else {
1151       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1152     }
1153     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1154 
1155     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1156     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1157     work_mat[0] = new_mat;
1158     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1159     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1160   }
1161 
1162   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1163   *B = work_mat[0];
1164   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1165   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1166   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1167   PetscFunctionReturn(0);
1168 }
1169 
1170 #undef __FUNCT__
1171 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1172 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1173 {
1174   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1175   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1176   Mat            new_mat;
1177   IS             is_local,is_global;
1178   PetscInt       local_size;
1179   PetscBool      isseqaij;
1180   PetscErrorCode ierr;
1181 
1182   PetscFunctionBegin;
1183   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1184   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1185   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1186   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1187   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1188   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1189   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1190 
1191   /* check */
1192   if (pcbddc->dbg_flag) {
1193     Vec       x,x_change;
1194     PetscReal error;
1195 
1196     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1197     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1198     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1199     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1200     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1201     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1202     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1203     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1204     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1205     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1206     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1207     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1208     ierr = VecDestroy(&x);CHKERRQ(ierr);
1209     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1210   }
1211 
1212   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1213   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1214   if (isseqaij) {
1215     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1216   } else {
1217     Mat work_mat;
1218     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1219     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1220     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1221   }
1222   ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
1223 #if !defined(PETSC_USE_COMPLEX)
1224   ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,pcbddc->issym);CHKERRQ(ierr);
1225 #endif
1226   /*
1227   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1228   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1229   */
1230   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 #undef __FUNCT__
1235 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1236 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1237 {
1238   PC_IS*         pcis = (PC_IS*)(pc->data);
1239   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1240   IS             is_aux1,is_aux2;
1241   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1242   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
1243   PetscInt       vbs,bs;
1244   PetscBT        bitmask;
1245   PetscErrorCode ierr;
1246 
1247   PetscFunctionBegin;
1248   /*
1249     No need to setup local scatters if
1250       - primal space is unchanged
1251         AND
1252       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1253         AND
1254       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1255   */
1256   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1257     PetscFunctionReturn(0);
1258   }
1259   /* destroy old objects */
1260   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1261   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1262   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1263   /* Set Non-overlapping dimensions */
1264   n_B = pcis->n_B; n_D = pcis->n - n_B;
1265   n_vertices = pcbddc->n_actual_vertices;
1266   /* create auxiliary bitmask */
1267   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1268   for (i=0;i<n_vertices;i++) {
1269     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
1270   }
1271 
1272   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1273   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1274   for (i=0, n_R=0; i<pcis->n; i++) {
1275     if (!PetscBTLookup(bitmask,i)) {
1276       idx_R_local[n_R] = i;
1277       n_R++;
1278     }
1279   }
1280 
1281   /* Block code */
1282   vbs = 1;
1283   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1284   if (bs>1 && !(n_vertices%bs)) {
1285     PetscBool is_blocked = PETSC_TRUE;
1286     PetscInt  *vary;
1287     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1288     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1289     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1290     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
1291     for (i=0; i<n_vertices/bs; i++) {
1292       if (vary[i]!=0 && vary[i]!=bs) {
1293         is_blocked = PETSC_FALSE;
1294         break;
1295       }
1296     }
1297     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1298       vbs = bs;
1299       for (i=0;i<n_R/vbs;i++) {
1300         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1301       }
1302     }
1303     ierr = PetscFree(vary);CHKERRQ(ierr);
1304   }
1305   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1306   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1307 
1308   /* print some info if requested */
1309   if (pcbddc->dbg_flag) {
1310     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1311     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1312     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1313     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1314     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1315     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1316     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
1317     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1318   }
1319 
1320   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1321   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1322   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1323   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1324   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1325   for (i=0; i<n_D; i++) {
1326     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1327   }
1328   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1329   for (i=0, j=0; i<n_R; i++) {
1330     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1331       aux_array1[j++] = i;
1332     }
1333   }
1334   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1335   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1336   for (i=0, j=0; i<n_B; i++) {
1337     if (!PetscBTLookup(bitmask,is_indices[i])) {
1338       aux_array2[j++] = i;
1339     }
1340   }
1341   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1342   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1343   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1344   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1345   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1346 
1347   if (pcbddc->switch_static || pcbddc->dbg_flag) {
1348     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1349     for (i=0, j=0; i<n_R; i++) {
1350       if (PetscBTLookup(bitmask,idx_R_local[i])) {
1351         aux_array1[j++] = i;
1352       }
1353     }
1354     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1355     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1356     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1357   }
1358   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1359   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1360   PetscFunctionReturn(0);
1361 }
1362 
1363 
1364 #undef __FUNCT__
1365 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1366 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1367 {
1368   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1369   PC_IS          *pcis = (PC_IS*)pc->data;
1370   PC             pc_temp;
1371   Mat            A_RR;
1372   MatReuse       reuse;
1373   PetscScalar    m_one = -1.0;
1374   PetscReal      value;
1375   PetscInt       n_D,n_R,ibs,mbs;
1376   PetscBool      use_exact,use_exact_reduced,issbaij;
1377   PetscErrorCode ierr;
1378   /* prefixes stuff */
1379   char           dir_prefix[256],neu_prefix[256],str_level[16];
1380   size_t         len;
1381 
1382   PetscFunctionBegin;
1383 
1384   /* compute prefixes */
1385   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1386   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1387   if (!pcbddc->current_level) {
1388     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1389     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1390     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1391     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1392   } else {
1393     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1394     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1395     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1396     len -= 15; /* remove "pc_bddc_coarse_" */
1397     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1398     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1399     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1400     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1401     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1402     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1403     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1404     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1405   }
1406 
1407   /* DIRICHLET PROBLEM */
1408   if (dirichlet) {
1409     if (pcbddc->issym) {
1410       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1411     }
1412     /* Matrix for Dirichlet problem is pcis->A_II */
1413     n_D = pcis->n - pcis->n_B;
1414     if (!pcbddc->ksp_D) { /* create object if not yet build */
1415       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1416       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1417       /* default */
1418       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1419       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1420       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1421       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1422       if (issbaij) {
1423         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1424       } else {
1425         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1426       }
1427       /* Allow user's customization */
1428       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1429       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1430     }
1431     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1432     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1433     if (!n_D) {
1434       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1435       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1436     }
1437     /* Set Up KSP for Dirichlet problem of BDDC */
1438     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1439     /* set ksp_D into pcis data */
1440     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1441     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1442     pcis->ksp_D = pcbddc->ksp_D;
1443   }
1444 
1445   /* NEUMANN PROBLEM */
1446   A_RR = 0;
1447   if (neumann) {
1448     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1449     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1450     if (pcbddc->ksp_R) { /* already created ksp */
1451       PetscInt nn_R;
1452       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1453       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1454       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1455       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1456         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1457         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1458         reuse = MAT_INITIAL_MATRIX;
1459       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1460         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1461           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1462           reuse = MAT_INITIAL_MATRIX;
1463         } else { /* safe to reuse the matrix */
1464           reuse = MAT_REUSE_MATRIX;
1465         }
1466       }
1467       /* last check */
1468       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1469         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1470         reuse = MAT_INITIAL_MATRIX;
1471       }
1472     } else { /* first time, so we need to create the matrix */
1473       reuse = MAT_INITIAL_MATRIX;
1474     }
1475     /* extract A_RR */
1476     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1477     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1478     if (ibs != mbs) {
1479       Mat newmat;
1480       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1481       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1482       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1483     } else {
1484       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1485     }
1486     if (pcbddc->issym) {
1487       ierr = MatSetOption(A_RR,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1488     }
1489     if (!pcbddc->ksp_R) { /* create object if not present */
1490       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1491       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1492       /* default */
1493       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1494       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1495       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1496       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1497       if (issbaij) {
1498         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1499       } else {
1500         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1501       }
1502       /* Allow user's customization */
1503       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1504       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1505     }
1506     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1507     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1508     if (!n_R) {
1509       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1510       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1511     }
1512     /* Set Up KSP for Neumann problem of BDDC */
1513     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1514   }
1515 
1516   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1517   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1518     if (pcbddc->dbg_flag) {
1519       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1520       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1521       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1522     }
1523     if (dirichlet) { /* Dirichlet */
1524       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1525       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1526       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1527       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1528       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1529       /* need to be adapted? */
1530       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1531       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1532       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1533       /* print info */
1534       if (pcbddc->dbg_flag) {
1535         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1536         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1537       }
1538       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1539         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1540       }
1541     }
1542     if (neumann) { /* Neumann */
1543       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1544       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1545       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1546       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1547       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1548       /* need to be adapted? */
1549       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1550       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1551       /* print info */
1552       if (pcbddc->dbg_flag) {
1553         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1554         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1555       }
1556       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1557         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1558       }
1559     }
1560   }
1561   /* free Neumann problem's matrix */
1562   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1563   PetscFunctionReturn(0);
1564 }
1565 
1566 #undef __FUNCT__
1567 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1568 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1569 {
1570   PetscErrorCode ierr;
1571   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1572 
1573   PetscFunctionBegin;
1574   if (applytranspose) {
1575     if (pcbddc->local_auxmat1) {
1576       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1577       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1578     }
1579     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1580   } else {
1581     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1582     if (pcbddc->local_auxmat1) {
1583       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1584       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1585     }
1586   }
1587   PetscFunctionReturn(0);
1588 }
1589 
1590 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1591 #undef __FUNCT__
1592 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1593 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1594 {
1595   PetscErrorCode ierr;
1596   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1597   PC_IS*            pcis = (PC_IS*)  (pc->data);
1598   const PetscScalar zero = 0.0;
1599 
1600   PetscFunctionBegin;
1601   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1602   if (applytranspose) {
1603     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1604     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1605   } else {
1606     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1607     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1608   }
1609   /* start communications from local primal nodes to rhs of coarse solver */
1610   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1611   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1612   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1613 
1614   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1615   /* TODO remove null space when doing multilevel */
1616   if (pcbddc->coarse_ksp) {
1617     Vec rhs,sol;
1618 
1619     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
1620     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
1621     if (applytranspose) {
1622       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1623     } else {
1624       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1625     }
1626   }
1627 
1628   /* Local solution on R nodes */
1629   if (pcis->n) {
1630     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1631     ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1632     ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1633     if (pcbddc->switch_static) {
1634       ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1635       ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1636     }
1637     ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1638     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1639     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1640     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1641     if (pcbddc->switch_static) {
1642       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1643       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1644     }
1645   }
1646 
1647   /* communications from coarse sol to local primal nodes */
1648   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1649   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1650 
1651   /* Sum contributions from two levels */
1652   if (applytranspose) {
1653     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1654     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1655   } else {
1656     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1657     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1658   }
1659   PetscFunctionReturn(0);
1660 }
1661 
1662 #undef __FUNCT__
1663 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1664 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1665 {
1666   PetscErrorCode ierr;
1667   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1668   PetscScalar    *array;
1669   Vec            from,to;
1670 
1671   PetscFunctionBegin;
1672   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1673     from = pcbddc->coarse_vec;
1674     to = pcbddc->vec1_P;
1675     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1676       Vec tvec;
1677 
1678       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1679       ierr = VecResetArray(tvec);CHKERRQ(ierr);
1680       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1681       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
1682       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
1683       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
1684     }
1685   } else { /* from local to global -> put data in coarse right hand side */
1686     from = pcbddc->vec1_P;
1687     to = pcbddc->coarse_vec;
1688   }
1689   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1690   PetscFunctionReturn(0);
1691 }
1692 
1693 #undef __FUNCT__
1694 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1695 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1696 {
1697   PetscErrorCode ierr;
1698   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1699   PetscScalar    *array;
1700   Vec            from,to;
1701 
1702   PetscFunctionBegin;
1703   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1704     from = pcbddc->coarse_vec;
1705     to = pcbddc->vec1_P;
1706   } else { /* from local to global -> put data in coarse right hand side */
1707     from = pcbddc->vec1_P;
1708     to = pcbddc->coarse_vec;
1709   }
1710   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1711   if (smode == SCATTER_FORWARD) {
1712     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1713       Vec tvec;
1714 
1715       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1716       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
1717       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
1718       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
1719     }
1720   } else {
1721     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
1722      ierr = VecResetArray(from);CHKERRQ(ierr);
1723     }
1724   }
1725   PetscFunctionReturn(0);
1726 }
1727 
1728 /* uncomment for testing purposes */
1729 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1730 #undef __FUNCT__
1731 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1732 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1733 {
1734   PetscErrorCode    ierr;
1735   PC_IS*            pcis = (PC_IS*)(pc->data);
1736   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1737   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1738   /* one and zero */
1739   PetscScalar       one=1.0,zero=0.0;
1740   /* space to store constraints and their local indices */
1741   PetscScalar       *temp_quadrature_constraint;
1742   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1743   /* iterators */
1744   PetscInt          i,j,k,total_counts,temp_start_ptr;
1745   /* BLAS integers */
1746   PetscBLASInt      lwork,lierr;
1747   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1748   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1749   /* reuse */
1750   PetscInt          olocal_primal_size;
1751   PetscInt          *oprimal_indices_local_idxs;
1752   /* change of basis */
1753   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1754   PetscBool         boolforchange,qr_needed;
1755   PetscBT           touched,change_basis,qr_needed_idx;
1756   /* auxiliary stuff */
1757   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1758   PetscInt          ncc,*gidxs=NULL,*permutation=NULL,*temp_indices_to_constraint_work=NULL;
1759   PetscScalar       *temp_quadrature_constraint_work=NULL;
1760   /* some quantities */
1761   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1762   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1763 
1764   PetscFunctionBegin;
1765   /* Destroy Mat objects computed previously */
1766   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1767   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1768 
1769   /* print some info */
1770   if (pcbddc->dbg_flag) {
1771     IS       vertices;
1772     PetscInt nv,nedges,nfaces;
1773     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
1774     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
1775     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
1776     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1777     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1778     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
1779     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
1780     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
1781     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1782   }
1783 
1784   if (!pcbddc->adaptive_selection) {
1785     IS           ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1786     MatNullSpace nearnullsp;
1787     const Vec    *nearnullvecs;
1788     Vec          *localnearnullsp;
1789     PetscScalar  *array;
1790     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
1791     PetscBool    nnsp_has_cnst;
1792     /* LAPACK working arrays for SVD or POD */
1793     PetscBool    skip_lapack;
1794     PetscScalar  *work;
1795     PetscReal    *singular_vals;
1796 #if defined(PETSC_USE_COMPLEX)
1797     PetscReal    *rwork;
1798 #endif
1799 #if defined(PETSC_MISSING_LAPACK_GESVD)
1800     PetscScalar  *temp_basis,*correlation_mat;
1801 #else
1802     PetscBLASInt dummy_int=1;
1803     PetscScalar  dummy_scalar=1.;
1804 #endif
1805 
1806     /* Get index sets for faces, edges and vertices from graph */
1807     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1808     /* free unneeded index sets */
1809     if (!pcbddc->use_vertices) {
1810       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1811     }
1812     if (!pcbddc->use_edges) {
1813       for (i=0;i<n_ISForEdges;i++) {
1814         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1815       }
1816       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1817       n_ISForEdges = 0;
1818     }
1819     if (!pcbddc->use_faces) {
1820       for (i=0;i<n_ISForFaces;i++) {
1821         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1822       }
1823       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1824       n_ISForFaces = 0;
1825     }
1826     /* HACKS (the following two blocks of code) */
1827     if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1828       pcbddc->use_change_of_basis = PETSC_TRUE;
1829       if (!ISForEdges) {
1830         pcbddc->use_change_on_faces = PETSC_TRUE;
1831       }
1832     }
1833     if (pcbddc->NullSpace) {
1834       /* use_change_of_basis should be consistent among processors */
1835       PetscBool tbool[2],gbool[2];
1836       tbool [0] = pcbddc->use_change_of_basis;
1837       tbool [1] = pcbddc->use_change_on_faces;
1838       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1839       pcbddc->use_change_of_basis = gbool[0];
1840       pcbddc->use_change_on_faces = gbool[1];
1841     }
1842 
1843     /* check if near null space is attached to global mat */
1844     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1845     if (nearnullsp) {
1846       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1847       /* remove any stored info */
1848       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1849       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1850       /* store information for BDDC solver reuse */
1851       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1852       pcbddc->onearnullspace = nearnullsp;
1853       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1854       for (i=0;i<nnsp_size;i++) {
1855         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1856       }
1857     } else { /* if near null space is not provided BDDC uses constants by default */
1858       nnsp_size = 0;
1859       nnsp_has_cnst = PETSC_TRUE;
1860     }
1861     /* get max number of constraints on a single cc */
1862     max_constraints = nnsp_size;
1863     if (nnsp_has_cnst) max_constraints++;
1864 
1865     /*
1866          Evaluate maximum storage size needed by the procedure
1867          - temp_indices will contain start index of each constraint stored as follows
1868          - temp_indices_to_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1869          - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1870          - temp_quadrature_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself
1871                                                                                                                                                            */
1872     total_counts = n_ISForFaces+n_ISForEdges;
1873     total_counts *= max_constraints;
1874     n_vertices = 0;
1875     if (ISForVertices) {
1876       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1877     }
1878     total_counts += n_vertices;
1879     ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr);
1880     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1881     total_counts = 0;
1882     max_size_of_constraint = 0;
1883     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1884       if (i<n_ISForEdges) {
1885         used_IS = &ISForEdges[i];
1886       } else {
1887         used_IS = &ISForFaces[i-n_ISForEdges];
1888       }
1889       ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1890       total_counts += j;
1891       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1892     }
1893     total_counts *= max_constraints;
1894     total_counts += n_vertices;
1895     ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1896     /* get local part of global near null space vectors */
1897     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1898     for (k=0;k<nnsp_size;k++) {
1899       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1900       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1901       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1902     }
1903 
1904     /* whether or not to skip lapack calls */
1905     skip_lapack = PETSC_TRUE;
1906     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
1907 
1908     /* allocate some auxiliary stuff */
1909     if (!skip_lapack || pcbddc->use_qr_single) {
1910       ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr);
1911     } else {
1912       gidxs = NULL;
1913       permutation = NULL;
1914       temp_indices_to_constraint_work = NULL;
1915       temp_quadrature_constraint_work = NULL;
1916     }
1917 
1918     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1919     if (!skip_lapack) {
1920       PetscScalar temp_work;
1921 
1922 #if defined(PETSC_MISSING_LAPACK_GESVD)
1923       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1924       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1925       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1926       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1927 #if defined(PETSC_USE_COMPLEX)
1928       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1929 #endif
1930       /* now we evaluate the optimal workspace using query with lwork=-1 */
1931       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1932       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1933       lwork = -1;
1934       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1935 #if !defined(PETSC_USE_COMPLEX)
1936       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1937 #else
1938       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1939 #endif
1940       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1941       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1942 #else /* on missing GESVD */
1943       /* SVD */
1944       PetscInt max_n,min_n;
1945       max_n = max_size_of_constraint;
1946       min_n = max_constraints;
1947       if (max_size_of_constraint < max_constraints) {
1948         min_n = max_size_of_constraint;
1949         max_n = max_constraints;
1950       }
1951       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1952 #if defined(PETSC_USE_COMPLEX)
1953       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1954 #endif
1955       /* now we evaluate the optimal workspace using query with lwork=-1 */
1956       lwork = -1;
1957       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1958       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1959       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1960       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1961 #if !defined(PETSC_USE_COMPLEX)
1962       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
1963 #else
1964       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
1965 #endif
1966       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1967       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1968 #endif /* on missing GESVD */
1969       /* Allocate optimal workspace */
1970       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1971       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
1972     }
1973     /* Now we can loop on constraining sets */
1974     total_counts = 0;
1975     temp_indices[0] = 0;
1976     /* vertices */
1977     if (ISForVertices) {
1978       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1979       if (nnsp_has_cnst) { /* consider all vertices */
1980         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1981         for (i=0;i<n_vertices;i++) {
1982           temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1983           temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1984           total_counts++;
1985         }
1986       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1987         PetscBool used_vertex;
1988         for (i=0;i<n_vertices;i++) {
1989           used_vertex = PETSC_FALSE;
1990           k = 0;
1991           while (!used_vertex && k<nnsp_size) {
1992             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1993             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1994               temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1995               temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1996               temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1997               total_counts++;
1998               used_vertex = PETSC_TRUE;
1999             }
2000             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2001             k++;
2002           }
2003         }
2004       }
2005       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2006       n_vertices = total_counts;
2007     }
2008 
2009     /* edges and faces */
2010     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2011       if (ncc<n_ISForEdges) {
2012         used_IS = &ISForEdges[ncc];
2013         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2014       } else {
2015         used_IS = &ISForFaces[ncc-n_ISForEdges];
2016         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2017       }
2018       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2019       temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
2020       ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
2021       ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2022       /* change of basis should not be performed on local periodic nodes */
2023       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2024       if (nnsp_has_cnst) {
2025         PetscScalar quad_value;
2026         temp_constraints++;
2027         if (!pcbddc->use_nnsp_true) {
2028           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2029         } else {
2030           quad_value = 1.0;
2031         }
2032         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2033         for (j=0;j<size_of_constraint;j++) {
2034           temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
2035         }
2036         /* sort by global ordering if using lapack subroutines (not needed!) */
2037         if (!skip_lapack || pcbddc->use_qr_single) {
2038           ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
2039           for (j=0;j<size_of_constraint;j++) {
2040             permutation[j]=j;
2041           }
2042           ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2043           for (j=0;j<size_of_constraint;j++) {
2044             if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen");
2045           }
2046           for (j=0;j<size_of_constraint;j++) {
2047             temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2048             temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2049           }
2050           ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2051           ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2052         }
2053         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2054         total_counts++;
2055       }
2056       for (k=0;k<nnsp_size;k++) {
2057         PetscReal real_value;
2058         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2059         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2060         for (j=0;j<size_of_constraint;j++) {
2061           temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
2062         }
2063         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2064         /* check if array is null on the connected component */
2065         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2066         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
2067         if (real_value > 0.0) { /* keep indices and values */
2068           /* sort by global ordering if using lapack subroutines */
2069           if (!skip_lapack || pcbddc->use_qr_single) {
2070             ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
2071             for (j=0;j<size_of_constraint;j++) {
2072               permutation[j]=j;
2073             }
2074             ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2075             for (j=0;j<size_of_constraint;j++) {
2076               temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2077               temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2078             }
2079             ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2080             ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2081           }
2082           temp_constraints++;
2083           temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2084           total_counts++;
2085         }
2086       }
2087       ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2088       valid_constraints = temp_constraints;
2089       if (!pcbddc->use_nnsp_true && temp_constraints) {
2090         if (temp_constraints == 1) { /* just normalize the constraint */
2091           PetscScalar norm;
2092           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2093           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2094           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2095           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2096         } else { /* perform SVD */
2097           PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2098 
2099 #if defined(PETSC_MISSING_LAPACK_GESVD)
2100           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2101              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2102              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2103                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2104                 from that computed using LAPACKgesvd
2105              -> This is due to a different computation of eigenvectors in LAPACKheev
2106              -> The quality of the POD-computed basis will be the same */
2107           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2108           /* Store upper triangular part of correlation matrix */
2109           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2110           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2111           for (j=0;j<temp_constraints;j++) {
2112             for (k=0;k<j+1;k++) {
2113               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one));
2114             }
2115           }
2116           /* compute eigenvalues and eigenvectors of correlation matrix */
2117           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2118           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2119 #if !defined(PETSC_USE_COMPLEX)
2120           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2121 #else
2122           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2123 #endif
2124           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2125           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2126           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2127           j = 0;
2128           while (j < temp_constraints && singular_vals[j] < tol) j++;
2129           total_counts = total_counts-j;
2130           valid_constraints = temp_constraints-j;
2131           /* scale and copy POD basis into used quadrature memory */
2132           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2133           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2134           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2135           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2136           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2137           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2138           if (j<temp_constraints) {
2139             PetscInt ii;
2140             for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
2141             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2142             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2143             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2144             for (k=0;k<temp_constraints-j;k++) {
2145               for (ii=0;ii<size_of_constraint;ii++) {
2146                 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2147               }
2148             }
2149           }
2150 #else  /* on missing GESVD */
2151           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2152           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2153           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2154           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2155 #if !defined(PETSC_USE_COMPLEX)
2156           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2157 #else
2158           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2159 #endif
2160           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2161           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2162           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2163           k = temp_constraints;
2164           if (k > size_of_constraint) k = size_of_constraint;
2165           j = 0;
2166           while (j < k && singular_vals[k-j-1] < tol) j++;
2167           valid_constraints = k-j;
2168           total_counts = total_counts-temp_constraints+valid_constraints;
2169 #endif /* on missing GESVD */
2170         }
2171       }
2172       /* setting change_of_basis flag is safe now */
2173       if (boolforchange) {
2174         for (j=0;j<valid_constraints;j++) {
2175           PetscBTSet(change_basis,total_counts-j-1);
2176         }
2177       }
2178     }
2179     /* free workspace */
2180     if (!skip_lapack || pcbddc->use_qr_single) {
2181       ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr);
2182     }
2183     if (!skip_lapack) {
2184       ierr = PetscFree(work);CHKERRQ(ierr);
2185 #if defined(PETSC_USE_COMPLEX)
2186       ierr = PetscFree(rwork);CHKERRQ(ierr);
2187 #endif
2188       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2189 #if defined(PETSC_MISSING_LAPACK_GESVD)
2190       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2191       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2192 #endif
2193     }
2194     for (k=0;k<nnsp_size;k++) {
2195       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2196     }
2197     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2198     /* free index sets of faces, edges and vertices */
2199     for (i=0;i<n_ISForFaces;i++) {
2200       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2201     }
2202     if (n_ISForFaces) {
2203       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2204     }
2205     for (i=0;i<n_ISForEdges;i++) {
2206       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2207     }
2208     if (n_ISForEdges) {
2209       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2210     }
2211     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2212   } else {
2213     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2214     PetscInt        cum = 0;
2215 
2216     total_counts = 0;
2217     n_vertices = 0;
2218     if (sub_schurs->is_Ej_com) {
2219       ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr);
2220     }
2221     max_constraints = 0;
2222     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2223       total_counts += pcbddc->adaptive_constraints_n[i];
2224       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2225     }
2226     temp_indices = pcbddc->adaptive_constraints_ptrs;
2227     temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs;
2228     temp_quadrature_constraint = pcbddc->adaptive_constraints_data;
2229 
2230 #if 0
2231     printf("Found %d totals\n",total_counts);
2232     for (i=0;i<total_counts;i++) {
2233       printf("const %d, start %d",i,temp_indices[i]);
2234       printf(" end %d:\n",temp_indices[i+1]);
2235       for (j=temp_indices[i];j<temp_indices[i+1];j++) {
2236         printf("  idxs %d",temp_indices_to_constraint[j]);
2237         printf("  data %1.2e\n",temp_quadrature_constraint[j]);
2238       }
2239     }
2240     for (i=0;i<n_vertices;i++) {
2241       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i+n_vertices]);
2242     }
2243     for (i=0;i<sub_schurs->n_subs;i++) {
2244       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2245     }
2246 #endif
2247 
2248     for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]);
2249     ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr);
2250     /* Change of basis */
2251     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2252     if (pcbddc->use_change_of_basis) {
2253       cum = n_vertices;
2254       for (i=0;i<sub_schurs->n_subs;i++) {
2255         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2256           for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) {
2257             ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr);
2258           }
2259         }
2260         cum += pcbddc->adaptive_constraints_n[i+n_vertices];
2261       }
2262     }
2263   }
2264 
2265   /* map temp_indices_to_constraint in boundary numbering */
2266   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
2267   if (i != temp_indices[total_counts]) {
2268     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
2269   }
2270 
2271   /* set quantities in pcbddc data structure and store previous primal size */
2272   /* n_vertices defines the number of subdomain corners in the primal space */
2273   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
2274   olocal_primal_size = pcbddc->local_primal_size;
2275   pcbddc->local_primal_size = total_counts;
2276   pcbddc->n_vertices = n_vertices;
2277   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
2278 
2279   /* Create constraint matrix */
2280   /* The constraint matrix is used to compute the l2g map of primal dofs */
2281   /* so we need to set it up properly either with or without change of basis */
2282   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2283   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2284   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2285   /* array to compute a local numbering of constraints : vertices first then constraints */
2286   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
2287   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
2288   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
2289   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
2290   /* auxiliary stuff for basis change */
2291   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
2292   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
2293 
2294   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2295   total_primal_vertices=0;
2296   for (i=0;i<pcbddc->local_primal_size;i++) {
2297     size_of_constraint=temp_indices[i+1]-temp_indices[i];
2298     if (size_of_constraint == 1) {
2299       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
2300       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
2301       aux_primal_minloc[total_primal_vertices]=0;
2302       total_primal_vertices++;
2303     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
2304       PetscInt min_loc,min_index;
2305       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
2306       /* find first untouched local node */
2307       k = 0;
2308       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
2309       min_index = global_indices[k];
2310       min_loc = k;
2311       /* search the minimum among global nodes already untouched on the cc */
2312       for (k=1;k<size_of_constraint;k++) {
2313         /* there can be more than one constraint on a single connected component */
2314         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
2315           min_index = global_indices[k];
2316           min_loc = k;
2317         }
2318       }
2319       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
2320       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
2321       aux_primal_minloc[total_primal_vertices]=min_loc;
2322       total_primal_vertices++;
2323     }
2324   }
2325   /* determine if a QR strategy is needed for change of basis */
2326   qr_needed = PETSC_FALSE;
2327   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
2328   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2329     if (PetscBTLookup(change_basis,i)) {
2330       if (!pcbddc->use_qr_single) {
2331         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2332         j = 0;
2333         for (k=0;k<size_of_constraint;k++) {
2334           if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
2335             j++;
2336           }
2337         }
2338         /* found more than one primal dof on the cc */
2339         if (j > 1) {
2340           PetscBTSet(qr_needed_idx,i);
2341           qr_needed = PETSC_TRUE;
2342         }
2343       } else {
2344         PetscBTSet(qr_needed_idx,i);
2345         qr_needed = PETSC_TRUE;
2346       }
2347     }
2348   }
2349   /* free workspace */
2350   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2351 
2352   /* permute indices in order to have a sorted set of vertices */
2353   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
2354 
2355   /* nonzero structure of constraint matrix */
2356   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2357   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
2358   j=total_primal_vertices;
2359   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2360     if (!PetscBTLookup(change_basis,i)) {
2361       nnz[j]=temp_indices[i+1]-temp_indices[i];
2362       j++;
2363     }
2364   }
2365   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2366   ierr = PetscFree(nnz);CHKERRQ(ierr);
2367   /* set values in constraint matrix */
2368   for (i=0;i<total_primal_vertices;i++) {
2369     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2370   }
2371   total_counts = total_primal_vertices;
2372   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2373     if (!PetscBTLookup(change_basis,i)) {
2374       size_of_constraint=temp_indices[i+1]-temp_indices[i];
2375       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
2376       total_counts++;
2377     }
2378   }
2379   /* assembling */
2380   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2381   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2382   /*
2383   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2384   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2385   */
2386   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2387   if (pcbddc->use_change_of_basis) {
2388     /* dual and primal dofs on a single cc */
2389     PetscInt     dual_dofs,primal_dofs;
2390     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
2391     PetscInt     primal_counter;
2392     /* working stuff for GEQRF */
2393     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2394     PetscBLASInt lqr_work;
2395     /* working stuff for UNGQR */
2396     PetscScalar  *gqr_work,lgqr_work_t;
2397     PetscBLASInt lgqr_work;
2398     /* working stuff for TRTRS */
2399     PetscScalar  *trs_rhs;
2400     PetscBLASInt Blas_NRHS;
2401     /* pointers for values insertion into change of basis matrix */
2402     PetscInt     *start_rows,*start_cols;
2403     PetscScalar  *start_vals;
2404     /* working stuff for values insertion */
2405     PetscBT      is_primal;
2406     /* matrix sizes */
2407     PetscInt     global_size,local_size;
2408     /* temporary change of basis */
2409     Mat          localChangeOfBasisMatrix;
2410     /* extra space for debugging */
2411     PetscScalar  *dbg_work;
2412 
2413     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2414     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2415     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2416     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2417     /* nonzeros for local mat */
2418     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2419     for (i=0;i<pcis->n;i++) nnz[i]=1;
2420     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2421       if (PetscBTLookup(change_basis,i)) {
2422         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2423         if (PetscBTLookup(qr_needed_idx,i)) {
2424           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2425         } else {
2426           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2;
2427           /* get local primal index on the cc */
2428           j = 0;
2429           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
2430           nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2431         }
2432       }
2433     }
2434     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2435     ierr = PetscFree(nnz);CHKERRQ(ierr);
2436     /* Set initial identity in the matrix */
2437     for (i=0;i<pcis->n;i++) {
2438       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2439     }
2440 
2441     if (pcbddc->dbg_flag) {
2442       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2443       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2444     }
2445 
2446 
2447     /* Now we loop on the constraints which need a change of basis */
2448     /*
2449        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2450        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2451 
2452        Basic blocks of change of basis matrix T computed by
2453 
2454           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2455 
2456             | 1        0   ...        0         s_1/S |
2457             | 0        1   ...        0         s_2/S |
2458             |              ...                        |
2459             | 0        ...            1     s_{n-1}/S |
2460             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2461 
2462             with S = \sum_{i=1}^n s_i^2
2463             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2464                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2465 
2466           - QR decomposition of constraints otherwise
2467     */
2468     if (qr_needed) {
2469       /* space to store Q */
2470       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2471       /* first we issue queries for optimal work */
2472       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2473       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2474       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2475       lqr_work = -1;
2476       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2477       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2478       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2479       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2480       lgqr_work = -1;
2481       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2482       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2483       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2484       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2485       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2486       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2487       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2488       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2489       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2490       /* array to store scaling factors for reflectors */
2491       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2492       /* array to store rhs and solution of triangular solver */
2493       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2494       /* allocating workspace for check */
2495       if (pcbddc->dbg_flag) {
2496         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
2497       }
2498     }
2499     /* array to store whether a node is primal or not */
2500     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2501     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2502     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2503     if (i != total_primal_vertices) {
2504       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2505     }
2506     for (i=0;i<total_primal_vertices;i++) {
2507       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2508     }
2509     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2510 
2511     /* loop on constraints and see whether or not they need a change of basis and compute it */
2512     /* -> using implicit ordering contained in temp_indices data */
2513     total_counts = pcbddc->n_vertices;
2514     primal_counter = total_counts;
2515     while (total_counts<pcbddc->local_primal_size) {
2516       primal_dofs = 1;
2517       if (PetscBTLookup(change_basis,total_counts)) {
2518         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2519         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) {
2520           primal_dofs++;
2521         }
2522         /* get constraint info */
2523         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2524         dual_dofs = size_of_constraint-primal_dofs;
2525 
2526         if (pcbddc->dbg_flag) {
2527           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2528         }
2529 
2530         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2531 
2532           /* copy quadrature constraints for change of basis check */
2533           if (pcbddc->dbg_flag) {
2534             ierr = PetscMemcpy(dbg_work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2535           }
2536           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2537           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2538 
2539           /* compute QR decomposition of constraints */
2540           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2541           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2542           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2543           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2544           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2545           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2546           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2547 
2548           /* explictly compute R^-T */
2549           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2550           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2551           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2552           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2553           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2554           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2555           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2556           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2557           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2558           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2559 
2560           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2561           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2562           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2563           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2564           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2565           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2566           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2567           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2568           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2569 
2570           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2571              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2572              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2573           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2574           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2575           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2576           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2577           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2578           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2579           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2580           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2581           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2582           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2583 
2584           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2585           start_rows = &temp_indices_to_constraint[temp_indices[total_counts]];
2586           /* insert cols for primal dofs */
2587           for (j=0;j<primal_dofs;j++) {
2588             start_vals = &qr_basis[j*size_of_constraint];
2589             start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2590             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2591           }
2592           /* insert cols for dual dofs */
2593           for (j=0,k=0;j<dual_dofs;k++) {
2594             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2595               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2596               start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k];
2597               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2598               j++;
2599             }
2600           }
2601 
2602           /* check change of basis */
2603           if (pcbddc->dbg_flag) {
2604             PetscInt   ii,jj;
2605             PetscBool valid_qr=PETSC_TRUE;
2606             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2607             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2608             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2609             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2610             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2611             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2612             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2613             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
2614             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2615             for (jj=0;jj<size_of_constraint;jj++) {
2616               for (ii=0;ii<primal_dofs;ii++) {
2617                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2618                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2619               }
2620             }
2621             if (!valid_qr) {
2622               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2623               for (jj=0;jj<size_of_constraint;jj++) {
2624                 for (ii=0;ii<primal_dofs;ii++) {
2625                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2626                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2627                   }
2628                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2629                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2630                   }
2631                 }
2632               }
2633             } else {
2634               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2635             }
2636           }
2637         } else { /* simple transformation block */
2638           PetscInt    row,col;
2639           PetscScalar val,norm;
2640 
2641           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2642           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one));
2643           for (j=0;j<size_of_constraint;j++) {
2644             PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2645             row = temp_indices_to_constraint[temp_indices[total_counts]+j];
2646             if (!PetscBTLookup(is_primal,row_B)) {
2647               col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2648               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2649               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2650             } else {
2651               for (k=0;k<size_of_constraint;k++) {
2652                 col = temp_indices_to_constraint[temp_indices[total_counts]+k];
2653                 if (row != col) {
2654                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2655                 } else {
2656                   val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm;
2657                 }
2658                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2659               }
2660             }
2661           }
2662           if (pcbddc->dbg_flag) {
2663             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2664           }
2665         }
2666         /* increment primal counter */
2667         primal_counter += primal_dofs;
2668       } else {
2669         if (pcbddc->dbg_flag) {
2670           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2671         }
2672       }
2673       /* increment constraint counter total_counts */
2674       total_counts += primal_dofs;
2675     }
2676 
2677     /* free workspace */
2678     if (qr_needed) {
2679       if (pcbddc->dbg_flag) {
2680         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
2681       }
2682       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2683       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2684       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2685       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2686       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2687     }
2688     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2689     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2690     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2691 
2692     /* assembling of global change of variable */
2693     {
2694       Mat      tmat;
2695       PetscInt bs;
2696 
2697       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2698       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2699       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2700       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2701       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2702       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2703       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
2704       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
2705       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2706       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2707       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2708       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2709       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2710       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2711       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2712       ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2713       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2714       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2715     }
2716     /* check */
2717     if (pcbddc->dbg_flag) {
2718       PetscReal error;
2719       Vec       x,x_change;
2720 
2721       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2722       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2723       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2724       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2725       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2726       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2727       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2728       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2729       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2730       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2731       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2732       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2733       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2734       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2735       ierr = VecDestroy(&x);CHKERRQ(ierr);
2736       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2737     }
2738 
2739     /* adapt sub_schurs computed (if any) */
2740     if (pcbddc->use_deluxe_scaling) {
2741       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2742       if (sub_schurs->n_subs_par_g) {
2743         SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Change of basis with deluxe scaling and parallel problems still needs to be implemented");
2744       }
2745       if (sub_schurs->S_Ej_all) {
2746         Mat S_1,S_2,tmat;
2747         IS is_all_N;
2748 
2749         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2750         ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,&tmat);CHKERRQ(ierr);
2751         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2752         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr);
2753         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2754         sub_schurs->S_Ej_all = S_1;
2755         ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr);
2756         ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2757         sub_schurs->sum_S_Ej_all = S_2;
2758         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2759       }
2760     }
2761     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2762   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2763     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2764     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2765   }
2766 
2767   /* set up change of basis context */
2768   if (pcbddc->ChangeOfBasisMatrix) {
2769     PCBDDCChange_ctx change_ctx;
2770 
2771     if (!pcbddc->new_global_mat) {
2772       PetscInt global_size,local_size;
2773 
2774       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2775       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2776       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2777       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2778       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2779       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2780       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2781       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2782       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2783     } else {
2784       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2785       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2786       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2787     }
2788     if (!pcbddc->user_ChangeOfBasisMatrix) {
2789       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2790       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2791     } else {
2792       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2793       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2794     }
2795     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2796     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2797     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2798     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2799   }
2800 
2801   /* get indices in local ordering for vertices and constraints */
2802   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2803     ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
2804     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2805   }
2806   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2807   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2808   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2809   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2810   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2811   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2812   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2813   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2814   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2815   /* set quantities in PCBDDC data struct */
2816   pcbddc->n_actual_vertices = i;
2817   /* check if a new primal space has been introduced */
2818   pcbddc->new_primal_space_local = PETSC_TRUE;
2819   if (olocal_primal_size == pcbddc->local_primal_size) {
2820     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2821     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2822     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2823   }
2824   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2825   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2826 
2827   /* flush dbg viewer */
2828   if (pcbddc->dbg_flag) {
2829     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2830   }
2831 
2832   /* free workspace */
2833   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2834   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2835   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2836   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2837   if (!pcbddc->adaptive_selection) {
2838     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2839     ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr);
2840   } else {
2841     ierr = PetscFree4(pcbddc->adaptive_constraints_n,
2842                       pcbddc->adaptive_constraints_ptrs,
2843                       pcbddc->adaptive_constraints_idxs,
2844                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2845     ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2846   }
2847   PetscFunctionReturn(0);
2848 }
2849 
2850 #undef __FUNCT__
2851 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2852 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2853 {
2854   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2855   PC_IS       *pcis = (PC_IS*)pc->data;
2856   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2857   PetscInt    ierr,i,vertex_size;
2858   PetscViewer viewer=pcbddc->dbg_viewer;
2859 
2860   PetscFunctionBegin;
2861   /* Reset previously computed graph */
2862   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2863   /* Init local Graph struct */
2864   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2865 
2866   /* Check validity of the csr graph passed in by the user */
2867   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2868     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2869   }
2870 
2871   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2872   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2873     PetscInt  *xadj,*adjncy;
2874     PetscInt  nvtxs;
2875     PetscBool flg_row=PETSC_FALSE;
2876 
2877     if (pcbddc->use_local_adj) {
2878 
2879       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2880       if (flg_row) {
2881         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2882         pcbddc->computed_rowadj = PETSC_TRUE;
2883       }
2884       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2885     } else if (pcbddc->current_level) { /* just compute subdomain's connected components for coarser levels */
2886       IS                     is_dummy;
2887       ISLocalToGlobalMapping l2gmap_dummy;
2888       PetscInt               j,sum;
2889       PetscInt               *cxadj,*cadjncy;
2890       const PetscInt         *idxs;
2891       PCBDDCGraph            graph;
2892       PetscBT                is_on_boundary;
2893 
2894       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
2895       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
2896       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
2897       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
2898       ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr);
2899       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
2900       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2901       if (flg_row) {
2902         graph->xadj = xadj;
2903         graph->adjncy = adjncy;
2904       }
2905       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
2906       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
2907       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2908 
2909       if (pcbddc->dbg_flag) {
2910         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr);
2911         for (i=0;i<graph->ncc;i++) {
2912           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
2913         }
2914         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2915       }
2916 
2917       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
2918       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2919       for (i=0;i<pcis->n_B;i++) {
2920         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
2921       }
2922       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2923 
2924       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
2925       sum = 0;
2926       for (i=0;i<graph->ncc;i++) {
2927         PetscInt sizecc = 0;
2928         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2929           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2930             sizecc++;
2931           }
2932         }
2933         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2934           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2935             cxadj[graph->queue[j]] = sizecc;
2936           }
2937         }
2938         sum += sizecc*sizecc;
2939       }
2940       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
2941       sum = 0;
2942       for (i=0;i<pcis->n;i++) {
2943         PetscInt temp = cxadj[i];
2944         cxadj[i] = sum;
2945         sum += temp;
2946       }
2947       cxadj[pcis->n] = sum;
2948       for (i=0;i<graph->ncc;i++) {
2949         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2950           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2951             PetscInt k,sizecc = 0;
2952             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
2953               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
2954                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
2955                 sizecc++;
2956               }
2957             }
2958           }
2959         }
2960       }
2961       if (pcis->n) {
2962         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
2963       } else {
2964         ierr = PetscFree(cxadj);CHKERRQ(ierr);
2965         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
2966       }
2967       graph->xadj = 0;
2968       graph->adjncy = 0;
2969       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
2970       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
2971     }
2972   }
2973 
2974   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2975   vertex_size = 1;
2976   if (pcbddc->user_provided_isfordofs) {
2977     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2978       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2979       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2980         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2981         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2982       }
2983       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2984       pcbddc->n_ISForDofs = 0;
2985       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2986     }
2987     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2988     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2989   } else {
2990     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2991       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2992       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2993       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2994         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2995       }
2996     }
2997   }
2998 
2999   /* Setup of Graph */
3000   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3001     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
3002   }
3003   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3004     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
3005   }
3006   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
3007 
3008   /* Graph's connected components analysis */
3009   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
3010 
3011   /* print some info to stdout */
3012   if (pcbddc->dbg_flag) {
3013     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
3014   }
3015 
3016   /* mark topography has done */
3017   pcbddc->recompute_topography = PETSC_FALSE;
3018   PetscFunctionReturn(0);
3019 }
3020 
3021 #undef __FUNCT__
3022 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
3023 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
3024 {
3025   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
3026   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
3027   PetscErrorCode ierr;
3028 
3029   PetscFunctionBegin;
3030   n = 0;
3031   vertices = 0;
3032   if (pcbddc->ConstraintMatrix) {
3033     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
3034     for (i=0;i<local_primal_size;i++) {
3035       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3036       if (size_of_constraint == 1) n++;
3037       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3038     }
3039     if (vertices_idx) {
3040       ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr);
3041       n = 0;
3042       for (i=0;i<local_primal_size;i++) {
3043         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3044         if (size_of_constraint == 1) {
3045           vertices[n++]=row_cmat_indices[0];
3046         }
3047         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3048       }
3049     }
3050   }
3051   *n_vertices = n;
3052   if (vertices_idx) *vertices_idx = vertices;
3053   PetscFunctionReturn(0);
3054 }
3055 
3056 #undef __FUNCT__
3057 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
3058 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
3059 {
3060   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
3061   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
3062   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
3063   PetscBT        touched;
3064   PetscErrorCode ierr;
3065 
3066     /* This function assumes that the number of local constraints per connected component
3067        is not greater than the number of nodes defined for the connected component
3068        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
3069   PetscFunctionBegin;
3070   n = 0;
3071   constraints_index = 0;
3072   if (pcbddc->ConstraintMatrix) {
3073     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
3074     max_size_of_constraint = 0;
3075     for (i=0;i<local_primal_size;i++) {
3076       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3077       if (size_of_constraint > 1) {
3078         n++;
3079       }
3080       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
3081       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3082     }
3083     if (constraints_idx) {
3084       ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr);
3085       ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr);
3086       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
3087       n = 0;
3088       for (i=0;i<local_primal_size;i++) {
3089         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3090         if (size_of_constraint > 1) {
3091           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
3092           /* find first untouched local node */
3093           j = 0;
3094           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
3095           min_index = row_cmat_global_indices[j];
3096           min_loc = j;
3097           /* search the minimum among nodes not yet touched on the connected component
3098              since there can be more than one constraint on a single cc */
3099           for (j=1;j<size_of_constraint;j++) {
3100             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
3101               min_index = row_cmat_global_indices[j];
3102               min_loc = j;
3103             }
3104           }
3105           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
3106           constraints_index[n++] = row_cmat_indices[min_loc];
3107         }
3108         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3109       }
3110       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
3111       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
3112     }
3113   }
3114   *n_constraints = n;
3115   if (constraints_idx) *constraints_idx = constraints_index;
3116   PetscFunctionReturn(0);
3117 }
3118 
3119 #undef __FUNCT__
3120 #define __FUNCT__ "PCBDDCSubsetNumbering"
3121 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
3122 {
3123   Vec            local_vec,global_vec;
3124   IS             seqis,paris;
3125   VecScatter     scatter_ctx;
3126   PetscScalar    *array;
3127   PetscInt       *temp_global_dofs;
3128   PetscScalar    globalsum;
3129   PetscInt       i,j,s;
3130   PetscInt       nlocals,first_index,old_index,max_local,max_global;
3131   PetscMPIInt    rank_prec_comm,size_prec_comm;
3132   PetscInt       *dof_sizes,*dof_displs;
3133   PetscBool      first_found;
3134   PetscErrorCode ierr;
3135 
3136   PetscFunctionBegin;
3137   /* mpi buffers */
3138   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
3139   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
3140   j = ( !rank_prec_comm ? size_prec_comm : 0);
3141   ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr);
3142   /* get maximum size of subset */
3143   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
3144   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
3145   max_local = 0;
3146   for (i=0;i<n_local_dofs;i++) {
3147     if (max_local < temp_global_dofs[i] ) {
3148       max_local = temp_global_dofs[i];
3149     }
3150   }
3151   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
3152   max_global++;
3153   max_local = 0;
3154   for (i=0;i<n_local_dofs;i++) {
3155     if (max_local < local_dofs[i] ) {
3156       max_local = local_dofs[i];
3157     }
3158   }
3159   max_local++;
3160   /* allocate workspace */
3161   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
3162   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
3163   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
3164   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
3165   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
3166   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
3167   /* create scatter */
3168   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
3169   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
3170   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
3171   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
3172   ierr = ISDestroy(&paris);CHKERRQ(ierr);
3173   /* init array */
3174   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
3175   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3176   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3177   if (local_dofs_mult) {
3178     for (i=0;i<n_local_dofs;i++) {
3179       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
3180     }
3181   } else {
3182     for (i=0;i<n_local_dofs;i++) {
3183       array[local_dofs[i]]=1.0;
3184     }
3185   }
3186   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3187   /* scatter into global vec and get total number of global dofs */
3188   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3189   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3190   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
3191   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
3192   /* Fill global_vec with cumulative function for global numbering */
3193   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
3194   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
3195   nlocals = 0;
3196   first_index = -1;
3197   first_found = PETSC_FALSE;
3198   for (i=0;i<s;i++) {
3199     if (!first_found && PetscRealPart(array[i]) > 0.1) {
3200       first_found = PETSC_TRUE;
3201       first_index = i;
3202     }
3203     nlocals += (PetscInt)PetscRealPart(array[i]);
3204   }
3205   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3206   if (!rank_prec_comm) {
3207     dof_displs[0]=0;
3208     for (i=1;i<size_prec_comm;i++) {
3209       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3210     }
3211   }
3212   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3213   if (first_found) {
3214     array[first_index] += (PetscScalar)nlocals;
3215     old_index = first_index;
3216     for (i=first_index+1;i<s;i++) {
3217       if (PetscRealPart(array[i]) > 0.1) {
3218         array[i] += array[old_index];
3219         old_index = i;
3220       }
3221     }
3222   }
3223   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
3224   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3225   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3226   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3227   /* get global ordering of local dofs */
3228   ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3229   if (local_dofs_mult) {
3230     for (i=0;i<n_local_dofs;i++) {
3231       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
3232     }
3233   } else {
3234     for (i=0;i<n_local_dofs;i++) {
3235       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
3236     }
3237   }
3238   ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3239   /* free workspace */
3240   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
3241   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
3242   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
3243   ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr);
3244   /* return pointer to global ordering of local dofs */
3245   *global_numbering_subset = temp_global_dofs;
3246   PetscFunctionReturn(0);
3247 }
3248 
3249 #undef __FUNCT__
3250 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3251 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3252 {
3253   PetscInt       i,j;
3254   PetscScalar    *alphas;
3255   PetscErrorCode ierr;
3256 
3257   PetscFunctionBegin;
3258   /* this implements stabilized Gram-Schmidt */
3259   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3260   for (i=0;i<n;i++) {
3261     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3262     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3263     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3264   }
3265   ierr = PetscFree(alphas);CHKERRQ(ierr);
3266   PetscFunctionReturn(0);
3267 }
3268 
3269 #undef __FUNCT__
3270 #define __FUNCT__ "MatISGetSubassemblingPattern"
3271 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
3272 {
3273   Mat             subdomain_adj;
3274   IS              new_ranks,ranks_send_to;
3275   MatPartitioning partitioner;
3276   Mat_IS          *matis;
3277   PetscInt        n_neighs,*neighs,*n_shared,**shared;
3278   PetscInt        prank;
3279   PetscMPIInt     size,rank,color;
3280   PetscInt        *xadj,*adjncy,*oldranks;
3281   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
3282   PetscInt        i,local_size,threshold=0;
3283   PetscErrorCode  ierr;
3284   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3285   PetscSubcomm    subcomm;
3286 
3287   PetscFunctionBegin;
3288   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3289   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3290   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3291 
3292   /* Get info on mapping */
3293   matis = (Mat_IS*)(mat->data);
3294   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3295   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3296 
3297   /* build local CSR graph of subdomains' connectivity */
3298   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3299   xadj[0] = 0;
3300   xadj[1] = PetscMax(n_neighs-1,0);
3301   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3302   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3303 
3304   if (threshold) {
3305     PetscInt xadj_count = 0;
3306     for (i=1;i<n_neighs;i++) {
3307       if (n_shared[i] > threshold) {
3308         adjncy[xadj_count] = neighs[i];
3309         adjncy_wgt[xadj_count] = n_shared[i];
3310         xadj_count++;
3311       }
3312     }
3313     xadj[1] = xadj_count;
3314   } else {
3315     if (xadj[1]) {
3316       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3317       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3318     }
3319   }
3320   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3321   if (use_square) {
3322     for (i=0;i<xadj[1];i++) {
3323       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3324     }
3325   }
3326   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3327 
3328   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3329 
3330   /*
3331     Restrict work on active processes only.
3332   */
3333   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3334   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3335   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3336   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3337   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3338   if (color) {
3339     ierr = PetscFree(xadj);CHKERRQ(ierr);
3340     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3341     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3342   } else {
3343     PetscInt coarsening_ratio;
3344     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
3345     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3346     prank = rank;
3347     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
3348     /*
3349     for (i=0;i<size;i++) {
3350       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3351     }
3352     */
3353     for (i=0;i<xadj[1];i++) {
3354       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3355     }
3356     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3357     ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3358     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3359 
3360     /* Partition */
3361     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
3362     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3363     if (use_vwgt) {
3364       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3365       v_wgt[0] = local_size;
3366       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3367     }
3368     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3369     coarsening_ratio = size/n_subdomains;
3370     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3371     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3372     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3373     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3374 
3375     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3376     if (contiguous) {
3377       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3378     } else {
3379       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3380     }
3381     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3382     /* clean up */
3383     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3384     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3385     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3386     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3387   }
3388   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3389 
3390   /* assemble parallel IS for sends */
3391   i = 1;
3392   if (color) i=0;
3393   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3394 
3395   /* get back IS */
3396   *is_sends = ranks_send_to;
3397   PetscFunctionReturn(0);
3398 }
3399 
3400 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3401 
3402 #undef __FUNCT__
3403 #define __FUNCT__ "MatISSubassemble"
3404 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3405 {
3406   Mat                    local_mat;
3407   Mat_IS                 *matis;
3408   IS                     is_sends_internal;
3409   PetscInt               rows,cols,new_local_rows;
3410   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3411   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3412   ISLocalToGlobalMapping l2gmap;
3413   PetscInt*              l2gmap_indices;
3414   const PetscInt*        is_indices;
3415   MatType                new_local_type;
3416   /* buffers */
3417   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3418   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3419   PetscInt               *recv_buffer_idxs_local;
3420   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3421   /* MPI */
3422   MPI_Comm               comm,comm_n;
3423   PetscSubcomm           subcomm;
3424   PetscMPIInt            n_sends,n_recvs,commsize;
3425   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3426   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3427   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3428   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3429   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3430   PetscErrorCode         ierr;
3431 
3432   PetscFunctionBegin;
3433   /* TODO: add missing checks */
3434   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3435   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3436   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3437   PetscValidLogicalCollectiveInt(mat,nis,7);
3438   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3439   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3440   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3441   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3442   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3443   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3444   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3445   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3446     PetscInt mrows,mcols,mnrows,mncols;
3447     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3448     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3449     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3450     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3451     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3452     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3453   }
3454   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3455   PetscValidLogicalCollectiveInt(mat,bs,0);
3456   /* prepare IS for sending if not provided */
3457   if (!is_sends) {
3458     PetscBool pcontig = PETSC_TRUE;
3459     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3460     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3461   } else {
3462     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3463     is_sends_internal = is_sends;
3464   }
3465 
3466   /* get pointer of MATIS data */
3467   matis = (Mat_IS*)mat->data;
3468 
3469   /* get comm */
3470   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3471 
3472   /* compute number of sends */
3473   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3474   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3475 
3476   /* compute number of receives */
3477   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3478   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3479   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3480   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3481   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3482   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3483   ierr = PetscFree(iflags);CHKERRQ(ierr);
3484 
3485   /* restrict comm if requested */
3486   subcomm = 0;
3487   destroy_mat = PETSC_FALSE;
3488   if (restrict_comm) {
3489     PetscMPIInt color,subcommsize;
3490 
3491     color = 0;
3492     if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */
3493     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3494     subcommsize = commsize - subcommsize;
3495     /* check if reuse has been requested */
3496     if (reuse == MAT_REUSE_MATRIX) {
3497       if (*mat_n) {
3498         PetscMPIInt subcommsize2;
3499         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3500         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3501         comm_n = PetscObjectComm((PetscObject)*mat_n);
3502       } else {
3503         comm_n = PETSC_COMM_SELF;
3504       }
3505     } else { /* MAT_INITIAL_MATRIX */
3506       PetscMPIInt rank;
3507 
3508       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3509       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3510       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3511       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3512       comm_n = PetscSubcommChild(subcomm);
3513     }
3514     /* flag to destroy *mat_n if not significative */
3515     if (color) destroy_mat = PETSC_TRUE;
3516   } else {
3517     comm_n = comm;
3518   }
3519 
3520   /* prepare send/receive buffers */
3521   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3522   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3523   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3524   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3525   if (nis) {
3526     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3527   }
3528 
3529   /* Get data from local matrices */
3530   if (!isdense) {
3531     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3532     /* TODO: See below some guidelines on how to prepare the local buffers */
3533     /*
3534        send_buffer_vals should contain the raw values of the local matrix
3535        send_buffer_idxs should contain:
3536        - MatType_PRIVATE type
3537        - PetscInt        size_of_l2gmap
3538        - PetscInt        global_row_indices[size_of_l2gmap]
3539        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3540     */
3541   } else {
3542     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3543     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3544     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3545     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3546     send_buffer_idxs[1] = i;
3547     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3548     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3549     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3550     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3551     for (i=0;i<n_sends;i++) {
3552       ilengths_vals[is_indices[i]] = len*len;
3553       ilengths_idxs[is_indices[i]] = len+2;
3554     }
3555   }
3556   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3557   /* additional is (if any) */
3558   if (nis) {
3559     PetscMPIInt psum;
3560     PetscInt j;
3561     for (j=0,psum=0;j<nis;j++) {
3562       PetscInt plen;
3563       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3564       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3565       psum += len+1; /* indices + lenght */
3566     }
3567     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3568     for (j=0,psum=0;j<nis;j++) {
3569       PetscInt plen;
3570       const PetscInt *is_array_idxs;
3571       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3572       send_buffer_idxs_is[psum] = plen;
3573       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3574       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3575       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3576       psum += plen+1; /* indices + lenght */
3577     }
3578     for (i=0;i<n_sends;i++) {
3579       ilengths_idxs_is[is_indices[i]] = psum;
3580     }
3581     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3582   }
3583 
3584   buf_size_idxs = 0;
3585   buf_size_vals = 0;
3586   buf_size_idxs_is = 0;
3587   for (i=0;i<n_recvs;i++) {
3588     buf_size_idxs += (PetscInt)olengths_idxs[i];
3589     buf_size_vals += (PetscInt)olengths_vals[i];
3590     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3591   }
3592   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3593   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3594   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3595 
3596   /* get new tags for clean communications */
3597   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3598   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3599   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3600 
3601   /* allocate for requests */
3602   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3603   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3604   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3605   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3606   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3607   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3608 
3609   /* communications */
3610   ptr_idxs = recv_buffer_idxs;
3611   ptr_vals = recv_buffer_vals;
3612   ptr_idxs_is = recv_buffer_idxs_is;
3613   for (i=0;i<n_recvs;i++) {
3614     source_dest = onodes[i];
3615     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3616     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3617     ptr_idxs += olengths_idxs[i];
3618     ptr_vals += olengths_vals[i];
3619     if (nis) {
3620       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3621       ptr_idxs_is += olengths_idxs_is[i];
3622     }
3623   }
3624   for (i=0;i<n_sends;i++) {
3625     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3626     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3627     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3628     if (nis) {
3629       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3630     }
3631   }
3632   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3633   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3634 
3635   /* assemble new l2g map */
3636   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3637   ptr_idxs = recv_buffer_idxs;
3638   new_local_rows = 0;
3639   for (i=0;i<n_recvs;i++) {
3640     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3641     ptr_idxs += olengths_idxs[i];
3642   }
3643   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3644   ptr_idxs = recv_buffer_idxs;
3645   new_local_rows = 0;
3646   for (i=0;i<n_recvs;i++) {
3647     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3648     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3649     ptr_idxs += olengths_idxs[i];
3650   }
3651   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3652   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3653   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3654 
3655   /* infer new local matrix type from received local matrices type */
3656   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3657   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3658   if (n_recvs) {
3659     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3660     ptr_idxs = recv_buffer_idxs;
3661     for (i=0;i<n_recvs;i++) {
3662       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3663         new_local_type_private = MATAIJ_PRIVATE;
3664         break;
3665       }
3666       ptr_idxs += olengths_idxs[i];
3667     }
3668     switch (new_local_type_private) {
3669       case MATDENSE_PRIVATE:
3670         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3671           new_local_type = MATSEQAIJ;
3672           bs = 1;
3673         } else { /* if I receive only 1 dense matrix */
3674           new_local_type = MATSEQDENSE;
3675           bs = 1;
3676         }
3677         break;
3678       case MATAIJ_PRIVATE:
3679         new_local_type = MATSEQAIJ;
3680         bs = 1;
3681         break;
3682       case MATBAIJ_PRIVATE:
3683         new_local_type = MATSEQBAIJ;
3684         break;
3685       case MATSBAIJ_PRIVATE:
3686         new_local_type = MATSEQSBAIJ;
3687         break;
3688       default:
3689         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3690         break;
3691     }
3692   } else { /* by default, new_local_type is seqdense */
3693     new_local_type = MATSEQDENSE;
3694     bs = 1;
3695   }
3696 
3697   /* create MATIS object if needed */
3698   if (reuse == MAT_INITIAL_MATRIX) {
3699     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3700     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3701   } else {
3702     /* it also destroys the local matrices */
3703     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3704   }
3705   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3706   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3707 
3708   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3709 
3710   /* Global to local map of received indices */
3711   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3712   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3713   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3714 
3715   /* restore attributes -> type of incoming data and its size */
3716   buf_size_idxs = 0;
3717   for (i=0;i<n_recvs;i++) {
3718     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3719     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3720     buf_size_idxs += (PetscInt)olengths_idxs[i];
3721   }
3722   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3723 
3724   /* set preallocation */
3725   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3726   if (!newisdense) {
3727     PetscInt *new_local_nnz=0;
3728 
3729     ptr_vals = recv_buffer_vals;
3730     ptr_idxs = recv_buffer_idxs_local;
3731     if (n_recvs) {
3732       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3733     }
3734     for (i=0;i<n_recvs;i++) {
3735       PetscInt j;
3736       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3737         for (j=0;j<*(ptr_idxs+1);j++) {
3738           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3739         }
3740       } else {
3741         /* TODO */
3742       }
3743       ptr_idxs += olengths_idxs[i];
3744     }
3745     if (new_local_nnz) {
3746       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3747       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3748       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3749       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3750       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3751       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3752     } else {
3753       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3754     }
3755     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3756   } else {
3757     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3758   }
3759 
3760   /* set values */
3761   ptr_vals = recv_buffer_vals;
3762   ptr_idxs = recv_buffer_idxs_local;
3763   for (i=0;i<n_recvs;i++) {
3764     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3765       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3766       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3767       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3768       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3769       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3770     } else {
3771       /* TODO */
3772     }
3773     ptr_idxs += olengths_idxs[i];
3774     ptr_vals += olengths_vals[i];
3775   }
3776   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3777   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3778   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3779   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3780   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3781   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3782 
3783 #if 0
3784   if (!restrict_comm) { /* check */
3785     Vec       lvec,rvec;
3786     PetscReal infty_error;
3787 
3788     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3789     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3790     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3791     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3792     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3793     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3794     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3795     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3796     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3797   }
3798 #endif
3799 
3800   /* assemble new additional is (if any) */
3801   if (nis) {
3802     PetscInt **temp_idxs,*count_is,j,psum;
3803 
3804     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3805     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3806     ptr_idxs = recv_buffer_idxs_is;
3807     psum = 0;
3808     for (i=0;i<n_recvs;i++) {
3809       for (j=0;j<nis;j++) {
3810         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3811         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3812         psum += plen;
3813         ptr_idxs += plen+1; /* shift pointer to received data */
3814       }
3815     }
3816     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3817     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3818     for (i=1;i<nis;i++) {
3819       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3820     }
3821     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3822     ptr_idxs = recv_buffer_idxs_is;
3823     for (i=0;i<n_recvs;i++) {
3824       for (j=0;j<nis;j++) {
3825         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3826         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3827         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3828         ptr_idxs += plen+1; /* shift pointer to received data */
3829       }
3830     }
3831     for (i=0;i<nis;i++) {
3832       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3833       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3834       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3835     }
3836     ierr = PetscFree(count_is);CHKERRQ(ierr);
3837     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3838     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3839   }
3840   /* free workspace */
3841   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3842   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3843   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3844   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3845   if (isdense) {
3846     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3847     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3848   } else {
3849     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3850   }
3851   if (nis) {
3852     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3853     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3854   }
3855   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3856   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3857   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3858   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3859   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3860   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3861   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3862   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3863   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3864   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3865   ierr = PetscFree(onodes);CHKERRQ(ierr);
3866   if (nis) {
3867     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3868     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3869     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3870   }
3871   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3872   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3873     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3874     for (i=0;i<nis;i++) {
3875       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3876     }
3877   }
3878   PetscFunctionReturn(0);
3879 }
3880 
3881 /* temporary hack into ksp private data structure */
3882 #include <petsc-private/kspimpl.h>
3883 
3884 #undef __FUNCT__
3885 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3886 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3887 {
3888   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3889   PC_IS                  *pcis = (PC_IS*)pc->data;
3890   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3891   MatNullSpace           CoarseNullSpace=NULL;
3892   ISLocalToGlobalMapping coarse_islg;
3893   IS                     coarse_is,*isarray;
3894   PetscInt               i,im_active=-1,active_procs=-1;
3895   PetscInt               nis,nisdofs,nisneu;
3896   PC                     pc_temp;
3897   PCType                 coarse_pc_type;
3898   KSPType                coarse_ksp_type;
3899   PetscBool              multilevel_requested,multilevel_allowed;
3900   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3901   Mat                    t_coarse_mat_is;
3902   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3903   PetscMPIInt            all_procs;
3904   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3905   PetscBool              compute_vecs = PETSC_FALSE;
3906   PetscScalar            *array;
3907   PetscErrorCode         ierr;
3908 
3909   PetscFunctionBegin;
3910   /* Assign global numbering to coarse dofs */
3911   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3912     PetscInt ocoarse_size;
3913     compute_vecs = PETSC_TRUE;
3914     ocoarse_size = pcbddc->coarse_size;
3915     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3916     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3917     /* see if we can avoid some work */
3918     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3919       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3920         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3921         coarse_reuse = PETSC_FALSE;
3922       } else { /* we can safely reuse already computed coarse matrix */
3923         coarse_reuse = PETSC_TRUE;
3924       }
3925     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3926       coarse_reuse = PETSC_FALSE;
3927     }
3928     /* reset any subassembling information */
3929     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3930     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3931   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3932     coarse_reuse = PETSC_TRUE;
3933   }
3934 
3935   /* count "active" (i.e. with positive local size) and "void" processes */
3936   im_active = !!(pcis->n);
3937   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3938   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3939   void_procs = all_procs-active_procs;
3940   csin_type_simple = PETSC_TRUE;
3941   redist = PETSC_FALSE;
3942   if (pcbddc->current_level && void_procs) {
3943     csin_ml = PETSC_TRUE;
3944     ncoarse_ml = void_procs;
3945     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
3946     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
3947       csin_ds = PETSC_TRUE;
3948       ncoarse_ds = pcbddc->redistribute_coarse;
3949       redist = PETSC_TRUE;
3950     } else {
3951       csin_ds = PETSC_TRUE;
3952       ncoarse_ds = active_procs;
3953       redist = PETSC_TRUE;
3954     }
3955   } else {
3956     csin_ml = PETSC_FALSE;
3957     ncoarse_ml = all_procs;
3958     if (void_procs) {
3959       csin_ds = PETSC_TRUE;
3960       ncoarse_ds = void_procs;
3961       csin_type_simple = PETSC_FALSE;
3962     } else {
3963       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
3964         csin_ds = PETSC_TRUE;
3965         ncoarse_ds = pcbddc->redistribute_coarse;
3966         redist = PETSC_TRUE;
3967       } else {
3968         csin_ds = PETSC_FALSE;
3969         ncoarse_ds = all_procs;
3970       }
3971     }
3972   }
3973 
3974   /*
3975     test if we can go multilevel: three conditions must be satisfied:
3976     - we have not exceeded the number of levels requested
3977     - we can actually subassemble the active processes
3978     - we can find a suitable number of MPI processes where we can place the subassembled problem
3979   */
3980   multilevel_allowed = PETSC_FALSE;
3981   multilevel_requested = PETSC_FALSE;
3982   if (pcbddc->current_level < pcbddc->max_levels) {
3983     multilevel_requested = PETSC_TRUE;
3984     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3985       multilevel_allowed = PETSC_FALSE;
3986     } else {
3987       multilevel_allowed = PETSC_TRUE;
3988     }
3989   }
3990   /* determine number of process partecipating to coarse solver */
3991   if (multilevel_allowed) {
3992     ncoarse = ncoarse_ml;
3993     csin = csin_ml;
3994     redist = PETSC_FALSE;
3995   } else {
3996     ncoarse = ncoarse_ds;
3997     csin = csin_ds;
3998   }
3999 
4000   /* creates temporary l2gmap and IS for coarse indexes */
4001   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
4002   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
4003 
4004   /* creates temporary MATIS object for coarse matrix */
4005   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
4006   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4007   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
4008   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4009 #if 0
4010   {
4011     PetscViewer viewer;
4012     char filename[256];
4013     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
4014     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
4015     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4016     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
4017     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4018   }
4019 #endif
4020   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
4021   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
4022   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4023   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4024   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
4025 
4026   /* compute dofs splitting and neumann boundaries for coarse dofs */
4027   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
4028     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
4029     const PetscInt         *idxs;
4030     ISLocalToGlobalMapping tmap;
4031 
4032     /* create map between primal indices (in local representative ordering) and local primal numbering */
4033     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
4034     /* allocate space for temporary storage */
4035     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
4036     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
4037     /* allocate for IS array */
4038     nisdofs = pcbddc->n_ISForDofsLocal;
4039     nisneu = !!pcbddc->NeumannBoundariesLocal;
4040     nis = nisdofs + nisneu;
4041     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
4042     /* dofs splitting */
4043     for (i=0;i<nisdofs;i++) {
4044       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4045       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4046       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4047       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4048       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4049       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4050       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4051       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4052     }
4053     /* neumann boundaries */
4054     if (pcbddc->NeumannBoundariesLocal) {
4055       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4056       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4057       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4058       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4059       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4060       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4061       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4062       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4063     }
4064     /* free memory */
4065     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4066     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4067     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4068   } else {
4069     nis = 0;
4070     nisdofs = 0;
4071     nisneu = 0;
4072     isarray = NULL;
4073   }
4074   /* destroy no longer needed map */
4075   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4076 
4077   /* restrict on coarse candidates (if needed) */
4078   coarse_mat_is = NULL;
4079   if (csin) {
4080     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4081       if (redist) {
4082         PetscMPIInt rank;
4083         PetscInt    spc,n_spc_p1,dest[1],destsize;
4084 
4085         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4086         spc = active_procs/ncoarse;
4087         n_spc_p1 = active_procs%ncoarse;
4088         if (im_active) {
4089           destsize = 1;
4090           if (rank > n_spc_p1*(spc+1)-1) {
4091             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4092           } else {
4093             dest[0] = rank/(spc+1);
4094           }
4095         } else {
4096           destsize = 0;
4097         }
4098         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4099       } else if (csin_type_simple) {
4100         PetscMPIInt rank;
4101         PetscInt    issize,isidx;
4102 
4103         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4104         if (im_active) {
4105           issize = 1;
4106           isidx = (PetscInt)rank;
4107         } else {
4108           issize = 0;
4109           isidx = -1;
4110         }
4111         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4112       } else { /* get a suitable subassembling pattern from MATIS code */
4113         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4114       }
4115 
4116       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4117       if (!redist || ncoarse <= void_procs) {
4118         PetscInt ncoarse_cand,tissize,*nisindices;
4119         PetscInt *coarse_candidates;
4120         const PetscInt* tisindices;
4121 
4122         /* get coarse candidates' ranks in pc communicator */
4123         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4124         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4125         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4126           if (!coarse_candidates[i]) {
4127             coarse_candidates[ncoarse_cand++]=i;
4128           }
4129         }
4130         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4131 
4132 
4133         if (pcbddc->dbg_flag) {
4134           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4135           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4136           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4137           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4138           for (i=0;i<ncoarse_cand;i++) {
4139             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4140           }
4141           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4142           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4143         }
4144         /* shift the pattern on coarse candidates */
4145         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4146         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4147         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4148         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4149         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4150         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4151         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4152       }
4153       if (pcbddc->dbg_flag) {
4154         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4155         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4156         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4157         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4158       }
4159     }
4160     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4161     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4162   } else {
4163     if (pcbddc->dbg_flag) {
4164       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4165       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4166       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4167     }
4168     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4169     coarse_mat_is = t_coarse_mat_is;
4170   }
4171 
4172   /* create local to global scatters for coarse problem */
4173   if (compute_vecs) {
4174     PetscInt lrows;
4175     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4176     if (coarse_mat_is) {
4177       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4178     } else {
4179       lrows = 0;
4180     }
4181     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4182     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4183     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4184     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4185     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4186   }
4187   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4188   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4189 
4190   /* set defaults for coarse KSP and PC */
4191   if (multilevel_allowed) {
4192     coarse_ksp_type = KSPRICHARDSON;
4193     coarse_pc_type = PCBDDC;
4194   } else {
4195     coarse_ksp_type = KSPPREONLY;
4196     coarse_pc_type = PCREDUNDANT;
4197   }
4198 
4199   /* print some info if requested */
4200   if (pcbddc->dbg_flag) {
4201     if (!multilevel_allowed) {
4202       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4203       if (multilevel_requested) {
4204         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4205       } else if (pcbddc->max_levels) {
4206         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4207       }
4208       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4209     }
4210   }
4211 
4212   /* create the coarse KSP object only once with defaults */
4213   if (coarse_mat_is) {
4214     MatReuse coarse_mat_reuse;
4215     PetscViewer dbg_viewer = NULL;
4216     if (pcbddc->dbg_flag) {
4217       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4218       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4219     }
4220     if (!pcbddc->coarse_ksp) {
4221       char prefix[256],str_level[16];
4222       size_t len;
4223       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4224       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4225       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4226       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4227       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4228       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4229       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4230       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4231       /* prefix */
4232       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4233       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4234       if (!pcbddc->current_level) {
4235         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4236         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4237       } else {
4238         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4239         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4240         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4241         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4242         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4243         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4244       }
4245       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4246       /* allow user customization */
4247       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4248       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4249     }
4250 
4251     /* get some info after set from options */
4252     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4253     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4254     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4255     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4256     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4257       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4258       isbddc = PETSC_FALSE;
4259     }
4260     if (isredundant) {
4261       KSP inner_ksp;
4262       PC inner_pc;
4263       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4264       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4265       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4266     }
4267 
4268     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4269     ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4270     ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4271     ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4272     if (nisdofs) {
4273       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4274       for (i=0;i<nisdofs;i++) {
4275         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4276       }
4277     }
4278     if (nisneu) {
4279       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4280       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4281     }
4282 
4283     /* assemble coarse matrix */
4284     if (coarse_reuse) {
4285       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4286       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4287       coarse_mat_reuse = MAT_REUSE_MATRIX;
4288     } else {
4289       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4290     }
4291     if (isbddc || isnn) {
4292       if (pcbddc->coarsening_ratio > 1) {
4293         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4294           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4295           if (pcbddc->dbg_flag) {
4296             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4297             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4298             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4299             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4300           }
4301         }
4302         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4303       } else {
4304         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4305         coarse_mat = coarse_mat_is;
4306       }
4307     } else {
4308       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4309     }
4310     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4311 
4312     /* propagate symmetry info to coarse matrix */
4313     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
4314     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4315 
4316     /* set operators */
4317     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4318     if (pcbddc->dbg_flag) {
4319       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4320     }
4321   } else { /* processes non partecipating to coarse solver (if any) */
4322     coarse_mat = 0;
4323   }
4324   ierr = PetscFree(isarray);CHKERRQ(ierr);
4325 #if 0
4326   {
4327     PetscViewer viewer;
4328     char filename[256];
4329     sprintf(filename,"coarse_mat.m");
4330     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4331     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4332     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4333     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4334   }
4335 #endif
4336 
4337   /* Compute coarse null space (special handling by BDDC only) */
4338   if (pcbddc->NullSpace) {
4339     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4340   }
4341 
4342   if (pcbddc->coarse_ksp) {
4343     Vec crhs,csol;
4344     PetscBool ispreonly;
4345     if (CoarseNullSpace) {
4346       if (isbddc) {
4347         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4348       } else {
4349         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4350       }
4351     }
4352     /* setup coarse ksp */
4353     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4354     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4355     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4356     /* hack */
4357     if (!csol) {
4358       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4359     }
4360     if (!crhs) {
4361       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4362     }
4363     /* Check coarse problem if in debug mode or if solving with an iterative method */
4364     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4365     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4366       KSP       check_ksp;
4367       KSPType   check_ksp_type;
4368       PC        check_pc;
4369       Vec       check_vec,coarse_vec;
4370       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4371       PetscInt  its;
4372       PetscBool compute_eigs;
4373       PetscReal *eigs_r,*eigs_c;
4374       PetscInt  neigs;
4375       const char *prefix;
4376 
4377       /* Create ksp object suitable for estimation of extreme eigenvalues */
4378       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4379       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4380       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4381       if (ispreonly) {
4382         check_ksp_type = KSPPREONLY;
4383         compute_eigs = PETSC_FALSE;
4384       } else {
4385         check_ksp_type = KSPGMRES;
4386         compute_eigs = PETSC_TRUE;
4387       }
4388       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4389       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4390       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4391       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4392       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4393       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4394       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4395       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4396       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4397       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4398       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4399       /* create random vec */
4400       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4401       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4402       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4403       if (CoarseNullSpace) {
4404         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4405       }
4406       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4407       /* solve coarse problem */
4408       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4409       if (CoarseNullSpace) {
4410         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4411       }
4412       /* set eigenvalue estimation if preonly has not been requested */
4413       if (compute_eigs) {
4414         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4415         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4416         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4417         lambda_max = eigs_r[neigs-1];
4418         lambda_min = eigs_r[0];
4419         if (pcbddc->use_coarse_estimates) {
4420           if (lambda_max>lambda_min) {
4421             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4422             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4423           }
4424         }
4425       }
4426 
4427       /* check coarse problem residual error */
4428       if (pcbddc->dbg_flag) {
4429         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4430         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4431         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4432         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4433         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4434         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4435         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4436         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4437         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4438         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4439         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4440         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4441         if (compute_eigs) {
4442           PetscReal lambda_max_s,lambda_min_s;
4443           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4444           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4445           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4446           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4447           for (i=0;i<neigs;i++) {
4448             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4449           }
4450         }
4451         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4452         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4453       }
4454       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4455       if (compute_eigs) {
4456         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4457         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4458       }
4459     }
4460   }
4461   /* print additional info */
4462   if (pcbddc->dbg_flag) {
4463     /* waits until all processes reaches this point */
4464     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4465     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4466     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4467   }
4468 
4469   /* free memory */
4470   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4471   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4472   PetscFunctionReturn(0);
4473 }
4474 
4475 #undef __FUNCT__
4476 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4477 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4478 {
4479   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4480   PC_IS*         pcis = (PC_IS*)pc->data;
4481   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4482   PetscInt       i,coarse_size=0;
4483   PetscInt       *local_primal_indices=NULL;
4484   PetscErrorCode ierr;
4485 
4486   PetscFunctionBegin;
4487   /* Compute global number of coarse dofs */
4488   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
4489     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
4490   }
4491   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
4492 
4493   /* check numbering */
4494   if (pcbddc->dbg_flag) {
4495     PetscScalar coarsesum,*array;
4496     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4497 
4498     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4499     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4500     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4501     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4502     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4503     for (i=0;i<pcbddc->local_primal_size;i++) {
4504       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4505     }
4506     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4507     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4508     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4509     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4510     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4511     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4512     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4513     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4514     for (i=0;i<pcis->n;i++) {
4515       if (array[i] == 1.0) {
4516         set_error = PETSC_TRUE;
4517         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4518       }
4519     }
4520     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4521     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4522     for (i=0;i<pcis->n;i++) {
4523       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4524     }
4525     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4526     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4527     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4528     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4529     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4530     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4531     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4532       PetscInt *gidxs;
4533 
4534       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
4535       ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
4536       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4537       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4538       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4539       for (i=0;i<pcbddc->local_primal_size;i++) {
4540         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);
4541       }
4542       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4543       ierr = PetscFree(gidxs);CHKERRQ(ierr);
4544     }
4545     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4546     if (set_error_reduced) {
4547       SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4548     }
4549   }
4550   /* get back data */
4551   *coarse_size_n = coarse_size;
4552   *local_primal_indices_n = local_primal_indices;
4553   PetscFunctionReturn(0);
4554 }
4555 
4556 #undef __FUNCT__
4557 #define __FUNCT__ "PCBDDCGlobalToLocal"
4558 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4559 {
4560   IS             localis_t;
4561   PetscInt       i,lsize,*idxs,n;
4562   PetscScalar    *vals;
4563   PetscErrorCode ierr;
4564 
4565   PetscFunctionBegin;
4566   /* get indices in local ordering exploiting local to global map */
4567   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4568   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4569   for (i=0;i<lsize;i++) vals[i] = 1.0;
4570   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4571   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4572   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4573   if (idxs) { /* multilevel guard */
4574     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4575   }
4576   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4577   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4578   ierr = PetscFree(vals);CHKERRQ(ierr);
4579   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4580   /* now compute set in local ordering */
4581   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4582   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4583   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4584   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4585   for (i=0,lsize=0;i<n;i++) {
4586     if (PetscRealPart(vals[i]) > 0.5) {
4587       lsize++;
4588     }
4589   }
4590   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4591   for (i=0,lsize=0;i<n;i++) {
4592     if (PetscRealPart(vals[i]) > 0.5) {
4593       idxs[lsize++] = i;
4594     }
4595   }
4596   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4597   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4598   *localis = localis_t;
4599   PetscFunctionReturn(0);
4600 }
4601 
4602 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4603 #undef __FUNCT__
4604 #define __FUNCT__ "PCBDDCMatMult_Private"
4605 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4606 {
4607   PCBDDCChange_ctx change_ctx;
4608   PetscErrorCode   ierr;
4609 
4610   PetscFunctionBegin;
4611   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4612   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4613   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4614   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4615   PetscFunctionReturn(0);
4616 }
4617 
4618 #undef __FUNCT__
4619 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4620 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4621 {
4622   PCBDDCChange_ctx change_ctx;
4623   PetscErrorCode   ierr;
4624 
4625   PetscFunctionBegin;
4626   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4627   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4628   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4629   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4630   PetscFunctionReturn(0);
4631 }
4632 
4633 #undef __FUNCT__
4634 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4635 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4636 {
4637   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4638   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4639   PetscInt            *used_xadj,*used_adjncy;
4640   PetscBool           free_used_adj;
4641   PetscErrorCode      ierr;
4642 
4643   PetscFunctionBegin;
4644   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4645   free_used_adj = PETSC_FALSE;
4646   if (pcbddc->sub_schurs_layers == -1) {
4647     used_xadj = NULL;
4648     used_adjncy = NULL;
4649   } else {
4650     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4651       used_xadj = pcbddc->mat_graph->xadj;
4652       used_adjncy = pcbddc->mat_graph->adjncy;
4653     } else if (pcbddc->computed_rowadj) {
4654       used_xadj = pcbddc->mat_graph->xadj;
4655       used_adjncy = pcbddc->mat_graph->adjncy;
4656     } else {
4657       PetscBool      flg_row=PETSC_FALSE;
4658       const PetscInt *xadj,*adjncy;
4659       PetscInt       nvtxs;
4660 
4661       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4662       if (flg_row) {
4663         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4664         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4665         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4666         free_used_adj = PETSC_TRUE;
4667       } else {
4668         pcbddc->sub_schurs_layers = -1;
4669         used_xadj = NULL;
4670         used_adjncy = NULL;
4671       }
4672       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4673     }
4674   }
4675   ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->adaptive_selection,pcbddc->use_deluxe_scaling,pcbddc->adaptive_invert_Stildas,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4676 
4677   /* free adjacency */
4678   if (free_used_adj) {
4679     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4680   }
4681   PetscFunctionReturn(0);
4682 }
4683 
4684 #undef __FUNCT__
4685 #define __FUNCT__ "PCBDDCInitSubSchurs"
4686 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4687 {
4688   PC_IS               *pcis=(PC_IS*)pc->data;
4689   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4690   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4691   PCBDDCGraph         graph;
4692   Mat                 S_j;
4693   PetscErrorCode      ierr;
4694 
4695   PetscFunctionBegin;
4696   /* attach interface graph for determining subsets */
4697   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4698     IS verticesIS;
4699 
4700     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4701     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4702     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr);
4703     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr);
4704     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4705     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4706 /*
4707     if (pcbddc->dbg_flag) {
4708       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4709     }
4710 */
4711   } else {
4712     graph = pcbddc->mat_graph;
4713   }
4714 
4715   /* Create Schur complement matrix */
4716   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4717   ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4718 
4719   /* sub_schurs init */
4720   ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_threshold);CHKERRQ(ierr);
4721   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4722   /* free graph struct */
4723   if (pcbddc->sub_schurs_rebuild) {
4724     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4725   }
4726   PetscFunctionReturn(0);
4727 }
4728