xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision e496cd5dfac3e9278b1c8f7c7cf0cc2eb88dc331)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscScalar     *Smult,*Seigv;
20   PetscReal       *eigs,thresh;
21   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscErrorCode  ierr;
26 
27   PetscFunctionBegin;
28   if (!sub_schurs->use_mumps) {
29     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
30   }
31 
32   if (pcbddc->dbg_flag) {
33     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
34     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
35     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
36     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
37   }
38 
39   if (pcbddc->dbg_flag) {
40     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
41   }
42 
43   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
44     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
45   }
46 
47   /* max size of subsets */
48   mss = 0;
49   for (i=0;i<sub_schurs->n_subs;i++) {
50     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
51       PetscInt subset_size;
52       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
53       mss = PetscMax(mss,subset_size);
54     }
55   }
56 
57   /* min/max and threshold */
58   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
59   nmin = pcbddc->adaptive_nmin > -1 ? pcbddc->adaptive_nmin : 1;
60   nmax = PetscMax(nmin,nmax);
61   if (pcbddc->adaptive_threshold > 1.0) {
62     thresh = 1.0/pcbddc->adaptive_threshold;
63   } else {
64     thresh = 1.0;
65   }
66 
67   /* allocate lapack workspace */
68   cum = cum2 = 0;
69   maxneigs = 0;
70   for (i=0;i<sub_schurs->n_subs;i++) {
71     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
72       PetscInt n,subset_size;
73       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
74       n = PetscMin(subset_size,nmax);
75       cum += subset_size*n;
76       cum2 += n;
77       maxneigs = PetscMax(maxneigs,n);
78     }
79   }
80 
81   if (mss) {
82     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
83       PetscBLASInt B_itype = 1;
84       PetscBLASInt B_N = mss;
85       PetscReal    zero = 0.0;
86       PetscReal    eps = 0.0; /* dlamch? */
87 
88       B_lwork = -1;
89       S = NULL;
90       St = NULL;
91       eigs = NULL;
92       eigv = NULL;
93       B_iwork = NULL;
94       B_ifail = NULL;
95       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
96 #if defined(PETSC_USE_COMPLEX)
97       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
98 #else
99       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
100 #endif
101       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
102       ierr = PetscFPTrapPop();CHKERRQ(ierr);
103     } else {
104         /* TODO */
105     }
106   } else {
107     lwork = 0;
108   }
109 
110   nv = 0;
111   if (sub_schurs->is_Ej_com) { /* complement of subsets, each entry is a vertex */
112     ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr);
113   }
114   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
115   ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs,
116                       B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
117 #if defined(PETSC_USE_COMPLEX)
118   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
119 #endif
120   ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr);
121   ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
122                       nv+cum2+1,&pcbddc->adaptive_constraints_ptrs,
123                       nv+cum,&pcbddc->adaptive_constraints_idxs,
124                       nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
125   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
126 
127   maxneigs = 0;
128   cum = cum2 = cumarray = 0;
129   if (sub_schurs->is_Ej_com) {
130     const PetscInt *idxs;
131 
132     ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
133     for (cum=0;cum<nv;cum++) {
134       pcbddc->adaptive_constraints_n[cum] = 1;
135       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
136       pcbddc->adaptive_constraints_ptrs[cum] = cum;
137       pcbddc->adaptive_constraints_data[cum] = 1.0;
138     }
139     cum2 = cum;
140     ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
141   }
142 
143   if (mss) { /* multilevel */
144     if (pcbddc->use_deluxe_scaling) {
145       ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
146     } else {
147       ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr);
148     }
149     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
150   }
151 
152   for (i=0;i<sub_schurs->n_subs;i++) {
153     PetscInt j,subset_size;
154 
155     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
156     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
157       const PetscInt *idxs;
158       PetscScalar    one = 1.0,scalar_zero = 0.0;
159       PetscReal      zero=0.0;
160       PetscBLASInt   B_N;
161 
162       /* S should be copied since we need it for deluxe scaling */
163       if (sub_schurs->is_hermitian) {
164         PetscInt j;
165         for (j=0;j<subset_size;j++) {
166           ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
167         }
168         for (j=0;j<subset_size;j++) {
169           ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
170         }
171       } else {
172         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
173         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
174       }
175       /* is always this the right matrix? */
176       ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
177 
178       /* we could reuse space already allocated when building sum_S_Ej_tilda_all */
179       /* St = Starray+cumarray; */
180 
181       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
182         PetscBLASInt B_itype = 1;
183         PetscBLASInt B_IL = 1, B_IU;
184         PetscReal    eps = -1.0; /* dlamch? */
185         PetscInt     nmin_s;
186 
187         /* ask for eigenvalues lower than thresh */
188         if (pcbddc->dbg_flag) {
189           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d.\n",i,sub_schurs->n_subs);
190         }
191         ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
192         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
193 #if defined(PETSC_USE_COMPLEX)
194         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
195 #else
196         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
197 #endif
198         ierr = PetscFPTrapPop();CHKERRQ(ierr);
199         if (B_ierr) {
200           if (B_ierr < 0 ) {
201             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
202           } else if (B_ierr <= B_N) {
203             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
204           } else {
205             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
206           }
207         }
208 
209         if (B_neigs > nmax) {
210           if (pcbddc->dbg_flag) {
211             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
212           }
213           B_neigs = nmax;
214         }
215 
216         nmin_s = PetscMin(nmin,B_N);
217         if (B_neigs < nmin_s) {
218           PetscBLASInt B_neigs2;
219 
220           B_IL = B_neigs + 1;
221           ierr = PetscBLASIntCast(nmin_s,&B_IU);CHKERRQ(ierr);
222           if (pcbddc->dbg_flag) {
223             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
224           }
225           if (sub_schurs->is_hermitian) {
226             PetscInt j;
227             for (j=0;j<subset_size;j++) {
228               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
229             }
230             for (j=0;j<subset_size;j++) {
231               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
232             }
233           } else {
234             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
235             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
236           }
237           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
238 #if defined(PETSC_USE_COMPLEX)
239           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
240 #else
241           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
242 #endif
243           ierr = PetscFPTrapPop();CHKERRQ(ierr);
244           B_neigs += B_neigs2;
245         }
246         if (B_ierr) {
247           if (B_ierr < 0 ) {
248             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
249           } else if (B_ierr <= B_N) {
250             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
251           } else {
252             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
253           }
254         }
255         if (pcbddc->dbg_flag) {
256           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);
257           for (j=0;j<B_neigs;j++) {
258             if (eigs[j] == 0.0) {
259               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");
260             } else {
261               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",1.0/eigs[j]);
262             }
263           }
264         }
265       } else {
266           /* TODO */
267       }
268       maxneigs = PetscMax(B_neigs,maxneigs);
269       pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
270 
271       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
272       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&scalar_zero,Seigv,&B_N));
273       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
274 
275       if (pcbddc->dbg_flag > 1) {
276         PetscInt ii;
277         for (ii=0;ii<B_neigs;ii++) {
278           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
279           for (j=0;j<B_N;j++) {
280             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e %1.4e\n",eigv[ii*B_N+j],Seigv[ii*B_N+j]);
281           }
282         }
283       }
284       ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
285       for (j=0;j<B_neigs;j++) {
286 #if 0
287         {
288           PetscBLASInt Blas_N,Blas_one = 1.0;
289           PetscScalar norm;
290           ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
291           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
292           if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
293             norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
294           } else {
295             norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
296           }
297           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
298         }
299 #endif
300         ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
301         pcbddc->adaptive_constraints_ptrs[cum++] = cum2;
302         cum2 += subset_size;
303       }
304       ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
305     }
306     /* shift for next computation */
307     cumarray += subset_size*subset_size;
308   }
309   if (pcbddc->dbg_flag) {
310     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
311   }
312   pcbddc->adaptive_constraints_ptrs[cum] = cum2;
313   ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr);
314 
315   if (mss) {
316     if (pcbddc->use_deluxe_scaling) {
317       ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
318     } else {
319       ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr);
320     }
321     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
322   }
323   ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
324 #if defined(PETSC_USE_COMPLEX)
325   ierr = PetscFree(rwork);CHKERRQ(ierr);
326 #endif
327   if (pcbddc->dbg_flag) {
328     PetscInt maxneigs_r;
329     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
330     ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
331   }
332   PetscFunctionReturn(0);
333 }
334 
335 #undef __FUNCT__
336 #define __FUNCT__ "PCBDDCSetUpSolvers"
337 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
338 {
339   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
340   PetscScalar    *coarse_submat_vals;
341   PetscErrorCode ierr;
342 
343   PetscFunctionBegin;
344   /* Setup local scatters R_to_B and (optionally) R_to_D */
345   /* PCBDDCSetUpLocalWorkVectors should be called first! */
346   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
347 
348   /* Setup local neumann solver ksp_R */
349   /* PCBDDCSetUpLocalScatters should be called first! */
350   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
351 
352   /* Change global null space passed in by the user if change of basis has been requested */
353   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
354     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
355   }
356 
357   /*
358      Setup local correction and local part of coarse basis.
359      Gives back the dense local part of the coarse matrix in column major ordering
360   */
361   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
362 
363   /* Compute total number of coarse nodes and setup coarse solver */
364   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
365 
366   /* free */
367   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
368   PetscFunctionReturn(0);
369 }
370 
371 #undef __FUNCT__
372 #define __FUNCT__ "PCBDDCResetCustomization"
373 PetscErrorCode PCBDDCResetCustomization(PC pc)
374 {
375   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
376   PetscErrorCode ierr;
377 
378   PetscFunctionBegin;
379   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
380   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
381   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
382   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
383   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
384   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
385   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
386   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
387   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
388   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
389   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
390   PetscFunctionReturn(0);
391 }
392 
393 #undef __FUNCT__
394 #define __FUNCT__ "PCBDDCResetTopography"
395 PetscErrorCode PCBDDCResetTopography(PC pc)
396 {
397   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
398   PetscErrorCode ierr;
399 
400   PetscFunctionBegin;
401   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
402   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
403   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
404   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
405   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
406   PetscFunctionReturn(0);
407 }
408 
409 #undef __FUNCT__
410 #define __FUNCT__ "PCBDDCResetSolvers"
411 PetscErrorCode PCBDDCResetSolvers(PC pc)
412 {
413   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
414   PetscScalar    *array;
415   PetscErrorCode ierr;
416 
417   PetscFunctionBegin;
418   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
419   if (pcbddc->coarse_phi_B) {
420     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
421     ierr = PetscFree(array);CHKERRQ(ierr);
422   }
423   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
424   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
425   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
426   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
427   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
428   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
429   if (pcbddc->local_auxmat2) {
430     ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
431     ierr = PetscFree(array);CHKERRQ(ierr);
432   }
433   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
434   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
435   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
436   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
437   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
438   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
439   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
440   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
441   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
442   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
443   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
444   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
445   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
446   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
447   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
448   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
454 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
455 {
456   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
457   PC_IS          *pcis = (PC_IS*)pc->data;
458   VecType        impVecType;
459   PetscInt       n_constraints,n_R,old_size;
460   PetscErrorCode ierr;
461 
462   PetscFunctionBegin;
463   if (!pcbddc->ConstraintMatrix) {
464     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
465   }
466   /* get sizes */
467   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
468   n_R = pcis->n-pcbddc->n_actual_vertices;
469   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
470   /* local work vectors (try to avoid unneeded work)*/
471   /* R nodes */
472   old_size = -1;
473   if (pcbddc->vec1_R) {
474     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
475   }
476   if (n_R != old_size) {
477     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
478     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
479     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
480     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
481     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
482     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
483   }
484   /* local primal dofs */
485   old_size = -1;
486   if (pcbddc->vec1_P) {
487     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
488   }
489   if (pcbddc->local_primal_size != old_size) {
490     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
491     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
492     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
493     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
494   }
495   /* local explicit constraints */
496   old_size = -1;
497   if (pcbddc->vec1_C) {
498     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
499   }
500   if (n_constraints && n_constraints != old_size) {
501     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
502     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
503     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
504     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
505   }
506   PetscFunctionReturn(0);
507 }
508 
509 #undef __FUNCT__
510 #define __FUNCT__ "PCBDDCSetUpCorrection"
511 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
512 {
513   PetscErrorCode         ierr;
514   /* pointers to pcis and pcbddc */
515   PC_IS*                 pcis = (PC_IS*)pc->data;
516   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
517   /* submatrices of local problem */
518   Mat                    A_RV,A_VR,A_VV;
519   /* submatrices of local coarse problem */
520   Mat                    S_VV,S_CV,S_VC,S_CC;
521   /* working matrices */
522   Mat                    C_CR;
523   /* additional working stuff */
524   PC                     pc_R;
525   Mat                    F;
526   PetscBool              isLU,isCHOL,isILU;
527 
528   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
529   PetscScalar            *work;
530   PetscInt               *idx_V_B;
531   PetscInt               n,n_vertices,n_constraints;
532   PetscInt               i,n_R,n_D,n_B;
533   PetscBool              unsymmetric_check;
534   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
535   MatType                impMatType;
536   /* some shortcuts to scalars */
537   PetscScalar            one=1.0,m_one=-1.0;
538 
539   PetscFunctionBegin;
540   /* get number of vertices (corners plus constraints with change of basis)
541      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
542   n_vertices = pcbddc->n_actual_vertices;
543   n_constraints = pcbddc->local_primal_size-n_vertices;
544   /* Set Non-overlapping dimensions */
545   n_B = pcis->n_B; n_D = pcis->n - n_B;
546   n_R = pcis->n-n_vertices;
547 
548   /* Set types for local objects needed by BDDC precondtioner */
549   impMatType = MATSEQDENSE;
550 
551   /* vertices in boundary numbering */
552   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
553   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
554   if (i != n_vertices) {
555     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
556   }
557 
558   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
559   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
560   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
561   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
562   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
563   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
564   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
565   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
566   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
567   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
568 
569   unsymmetric_check = PETSC_FALSE;
570   /* allocate workspace */
571   n = 0;
572   if (n_constraints) {
573     n += n_R*n_constraints;
574   }
575   if (n_vertices) {
576     n = PetscMax(2*n_R*n_vertices,n);
577   }
578   if (!pcbddc->issym) {
579     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
580     unsymmetric_check = PETSC_TRUE;
581   }
582   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
583 
584   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
585   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
586   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
587   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
588   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
589   if (isLU || isILU || isCHOL) {
590     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
591   } else {
592     F = NULL;
593   }
594 
595   /* Precompute stuffs needed for preprocessing and application of BDDC*/
596   if (n_constraints) {
597     Mat M1,M2,M3;
598     IS  is_aux;
599     /* see if we can save some allocations */
600     if (pcbddc->local_auxmat2) {
601       PetscInt on_R,on_constraints;
602       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
603       if (on_R != n_R || on_constraints != n_constraints) {
604         PetscScalar *marray;
605 
606         ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr);
607         ierr = PetscFree(marray);CHKERRQ(ierr);
608         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
609         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
610       }
611     }
612     /* auxiliary matrices */
613     if (!pcbddc->local_auxmat2) {
614       PetscScalar *marray;
615 
616       ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr);
617       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr);
618       marray += n_R*n_constraints;
619       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr);
620     }
621 
622     /* Extract constraints on R nodes: C_{CR}  */
623     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
624     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
625     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
626 
627     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
628     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
629     for (i=0;i<n_constraints;i++) {
630       const PetscScalar *row_cmat_values;
631       const PetscInt    *row_cmat_indices;
632       PetscInt          size_of_constraint,j;
633 
634       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
635       for (j=0;j<size_of_constraint;j++) {
636         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
637       }
638       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
639     }
640     if (F) {
641       Mat B;
642 
643       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
644       ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr);
645       ierr = MatDestroy(&B);CHKERRQ(ierr);
646     } else {
647       PetscScalar *xarray;
648       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
649       for (i=0;i<n_constraints;i++) {
650         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
651         ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr);
652         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
653         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
654         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
655       }
656       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
657     }
658 
659     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
660     ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
661     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
662     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
663     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
664     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
665     ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr);
666     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
667     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
668     ierr = MatDestroy(&M2);CHKERRQ(ierr);
669     ierr = MatDestroy(&M3);CHKERRQ(ierr);
670     /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */
671     ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
672     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
673     ierr = MatDestroy(&M1);CHKERRQ(ierr);
674   }
675   /* Get submatrices from subdomain matrix */
676   if (n_vertices) {
677     Mat       newmat;
678     IS        is_aux;
679     PetscInt  ibs,mbs;
680     PetscBool issbaij;
681 
682     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
683     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
684     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
685     if (ibs != mbs) { /* need to convert to SEQAIJ */
686       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
687       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
688       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
689       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
690       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
691     } else {
692       /* this is safe */
693       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
694       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
695       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
696         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
697         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
698         ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
699         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
700         ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
701       } else {
702         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
703         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
704       }
705     }
706     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
707   }
708 
709   /* Matrix of coarse basis functions (local) */
710   if (pcbddc->coarse_phi_B) {
711     PetscInt on_B,on_primal,on_D=n_D;
712     if (pcbddc->coarse_phi_D) {
713       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
714     }
715     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
716     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
717       PetscScalar *marray;
718 
719       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
720       ierr = PetscFree(marray);CHKERRQ(ierr);
721       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
722       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
723       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
724       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
725     }
726   }
727 
728   if (!pcbddc->coarse_phi_B) {
729     PetscScalar *marray;
730 
731     n = n_B*pcbddc->local_primal_size;
732     if (pcbddc->switch_static || pcbddc->dbg_flag) {
733       n += n_D*pcbddc->local_primal_size;
734     }
735     if (!pcbddc->issym) {
736       n *= 2;
737     }
738     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
739     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
740     n = n_B*pcbddc->local_primal_size;
741     if (pcbddc->switch_static || pcbddc->dbg_flag) {
742       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
743       n += n_D*pcbddc->local_primal_size;
744     }
745     if (!pcbddc->issym) {
746       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
747       if (pcbddc->switch_static || pcbddc->dbg_flag) {
748         n = n_B*pcbddc->local_primal_size;
749         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
750       }
751     } else {
752       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
753       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
754       if (pcbddc->switch_static || pcbddc->dbg_flag) {
755         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
756         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
757       }
758     }
759   }
760   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
761   /* vertices */
762   if (n_vertices) {
763 
764     if (n_R) {
765       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
766       PetscBLASInt B_N,B_one = 1;
767       PetscScalar  *x,*y;
768 
769       ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
770       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
771       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
772       if (F) {
773         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
774       } else {
775         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
776         for (i=0;i<n_vertices;i++) {
777           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
778           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
779           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
780           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
781           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
782         }
783         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
784       }
785       ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr);
786       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
787       if (n_constraints) {
788         Mat B;
789         ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
790         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
791         ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
792         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
793         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
794         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
795         ierr = MatDestroy(&B);CHKERRQ(ierr);
796       }
797       ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
798       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
799       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
800       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
801       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
802       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
803       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
804       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
805       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
806       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
807       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
808       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
809     } else {
810       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
811       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
812     }
813     /* coarse basis functions */
814     for (i=0;i<n_vertices;i++) {
815       PetscScalar *y;
816 
817       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
818       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
819       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
820       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
821       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
822       y[n_B*i+idx_V_B[i]] = 1.0;
823       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
824       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
825 
826       if (pcbddc->switch_static || pcbddc->dbg_flag) {
827         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
828         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
829         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
830         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
831         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
832         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
833       }
834       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
835     }
836     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
837     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
838   }
839 
840   if (n_constraints) {
841     Mat B;
842 
843     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
844     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
845     ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
846     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
847     if (n_vertices) {
848       ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr);
849     }
850     ierr = MatDestroy(&B);CHKERRQ(ierr);
851     /* coarse basis functions */
852     for (i=0;i<n_constraints;i++) {
853       PetscScalar *y;
854 
855       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
856       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
857       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
858       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
859       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
860       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
861       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
862 
863       if (pcbddc->switch_static || pcbddc->dbg_flag) {
864         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
865         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
866         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
867         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
869         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
870       }
871       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
872     }
873   }
874 
875   /* compute other basis functions for non-symmetric problems */
876   if (!pcbddc->issym) {
877     Mat B,X;
878 
879     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr);
880 
881     if (n_constraints) {
882       Mat S_CCT,B_C;
883 
884       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
885       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
886       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
887       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
888       if (n_vertices) {
889         Mat B_V,S_VCT;
890 
891         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
892         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
893         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
894         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
895         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
896       }
897       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
898     }
899     if (n_vertices && n_R) {
900       Mat          A_VRT;
901       PetscBLASInt B_N,B_one = 1;
902 
903       if (!n_constraints) { /* if there are no constraints, reset work */
904         ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
905       }
906       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr);
907       ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
908       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
909       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one));
910       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
911     }
912 
913     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr);
914     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
915       for (i=0;i<pcbddc->local_primal_size;i++) {
916         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
917         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
918         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
919         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
920         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
921       }
922     } else {
923       for (i=0;i<pcbddc->local_primal_size;i++) {
924         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
925         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
926         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
927         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
928         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
929       }
930     }
931     ierr = MatDestroy(&B);CHKERRQ(ierr);
932     /* coarse basis functions */
933     for (i=0;i<pcbddc->local_primal_size;i++) {
934       PetscScalar *y;
935 
936       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
937       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
938       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
939       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
940       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
941       if (i<n_vertices) {
942         y[n_B*i+idx_V_B[i]] = 1.0;
943       }
944       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
945       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
946 
947       if (pcbddc->switch_static || pcbddc->dbg_flag) {
948         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
949         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
950         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
951         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
952         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
953         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
954       }
955       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
956     }
957     ierr = MatDestroy(&X);CHKERRQ(ierr);
958   }
959 
960   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
961   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
962   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
963   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
964   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
965   /* Checking coarse_sub_mat and coarse basis functios */
966   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
967   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
968   if (pcbddc->dbg_flag) {
969     Mat         coarse_sub_mat;
970     Mat         AUXMAT,TM1,TM2,TM3,TM4;
971     Mat         coarse_phi_D,coarse_phi_B;
972     Mat         coarse_psi_D,coarse_psi_B;
973     Mat         A_II,A_BB,A_IB,A_BI;
974     MatType     checkmattype=MATSEQAIJ;
975     PetscReal   real_value;
976 
977     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
978     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
979     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
980     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
981     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
982     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
983     if (unsymmetric_check) {
984       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
985       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
986     }
987     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
988 
989     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
990     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr);
991     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
992     if (unsymmetric_check) {
993       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
994       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
995       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
996       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
997       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
998       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
999       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1000       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1001       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1002       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1003       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1004       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1005     } else {
1006       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1007       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1008       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1009       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1010       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1011       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1012       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1013       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1014     }
1015     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1016     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1017     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1018     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1019     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1020     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
1021 
1022     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1023     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1024     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1025     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1026     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1027     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1028     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1029     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1030     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1031     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1032     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1033     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1034     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1035     if (unsymmetric_check) {
1036       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1037       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1038     }
1039     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1040   }
1041 
1042   /* free memory */
1043   ierr = PetscFree(work);CHKERRQ(ierr);
1044   if (n_vertices) {
1045     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1046   }
1047   if (n_constraints) {
1048     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1049   }
1050   /* get back data */
1051   *coarse_submat_vals_n = coarse_submat_vals;
1052   PetscFunctionReturn(0);
1053 }
1054 
1055 #undef __FUNCT__
1056 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1057 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1058 {
1059   Mat            *work_mat;
1060   IS             isrow_s,iscol_s;
1061   PetscBool      rsorted,csorted;
1062   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1063   PetscErrorCode ierr;
1064 
1065   PetscFunctionBegin;
1066   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1067   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1068   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1069   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1070 
1071   if (!rsorted) {
1072     const PetscInt *idxs;
1073     PetscInt *idxs_sorted,i;
1074 
1075     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1076     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1077     for (i=0;i<rsize;i++) {
1078       idxs_perm_r[i] = i;
1079     }
1080     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1081     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1082     for (i=0;i<rsize;i++) {
1083       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1084     }
1085     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1086     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1087   } else {
1088     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1089     isrow_s = isrow;
1090   }
1091 
1092   if (!csorted) {
1093     if (isrow == iscol) {
1094       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1095       iscol_s = isrow_s;
1096     } else {
1097       const PetscInt *idxs;
1098       PetscInt *idxs_sorted,i;
1099 
1100       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1101       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1102       for (i=0;i<csize;i++) {
1103         idxs_perm_c[i] = i;
1104       }
1105       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1106       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1107       for (i=0;i<csize;i++) {
1108         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1109       }
1110       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1111       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1112     }
1113   } else {
1114     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1115     iscol_s = iscol;
1116   }
1117 
1118   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1119 
1120   if (!rsorted || !csorted) {
1121     Mat      new_mat;
1122     IS       is_perm_r,is_perm_c;
1123 
1124     if (!rsorted) {
1125       PetscInt *idxs_r,i;
1126       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1127       for (i=0;i<rsize;i++) {
1128         idxs_r[idxs_perm_r[i]] = i;
1129       }
1130       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1131       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1132     } else {
1133       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1134     }
1135     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1136 
1137     if (!csorted) {
1138       if (isrow_s == iscol_s) {
1139         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1140         is_perm_c = is_perm_r;
1141       } else {
1142         PetscInt *idxs_c,i;
1143         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1144         for (i=0;i<csize;i++) {
1145           idxs_c[idxs_perm_c[i]] = i;
1146         }
1147         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1148         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1149       }
1150     } else {
1151       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1152     }
1153     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1154 
1155     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1156     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1157     work_mat[0] = new_mat;
1158     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1159     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1160   }
1161 
1162   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1163   *B = work_mat[0];
1164   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1165   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1166   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1167   PetscFunctionReturn(0);
1168 }
1169 
1170 #undef __FUNCT__
1171 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1172 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1173 {
1174   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1175   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1176   Mat            new_mat;
1177   IS             is_local,is_global;
1178   PetscInt       local_size;
1179   PetscBool      isseqaij;
1180   PetscErrorCode ierr;
1181 
1182   PetscFunctionBegin;
1183   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1184   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1185   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1186   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1187   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1188   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1189   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1190 
1191   /* check */
1192   if (pcbddc->dbg_flag) {
1193     Vec       x,x_change;
1194     PetscReal error;
1195 
1196     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1197     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1198     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1199     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1200     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1201     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1202     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1203     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1204     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1205     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1206     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1207     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1208     ierr = VecDestroy(&x);CHKERRQ(ierr);
1209     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1210   }
1211 
1212   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1213   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1214   if (isseqaij) {
1215     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1216   } else {
1217     Mat work_mat;
1218     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1219     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1220     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1221   }
1222   ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
1223 #if !defined(PETSC_USE_COMPLEX)
1224   ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,pcbddc->issym);CHKERRQ(ierr);
1225 #endif
1226   /*
1227   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1228   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1229   */
1230   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 #undef __FUNCT__
1235 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1236 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1237 {
1238   PC_IS*         pcis = (PC_IS*)(pc->data);
1239   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1240   IS             is_aux1,is_aux2;
1241   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1242   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
1243   PetscInt       vbs,bs;
1244   PetscBT        bitmask;
1245   PetscErrorCode ierr;
1246 
1247   PetscFunctionBegin;
1248   /*
1249     No need to setup local scatters if
1250       - primal space is unchanged
1251         AND
1252       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1253         AND
1254       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1255   */
1256   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1257     PetscFunctionReturn(0);
1258   }
1259   /* destroy old objects */
1260   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1261   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1262   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1263   /* Set Non-overlapping dimensions */
1264   n_B = pcis->n_B; n_D = pcis->n - n_B;
1265   n_vertices = pcbddc->n_actual_vertices;
1266   /* create auxiliary bitmask */
1267   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1268   for (i=0;i<n_vertices;i++) {
1269     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
1270   }
1271 
1272   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1273   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1274   for (i=0, n_R=0; i<pcis->n; i++) {
1275     if (!PetscBTLookup(bitmask,i)) {
1276       idx_R_local[n_R] = i;
1277       n_R++;
1278     }
1279   }
1280 
1281   /* Block code */
1282   vbs = 1;
1283   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1284   if (bs>1 && !(n_vertices%bs)) {
1285     PetscBool is_blocked = PETSC_TRUE;
1286     PetscInt  *vary;
1287     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1288     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1289     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1290     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
1291     for (i=0; i<n_vertices/bs; i++) {
1292       if (vary[i]!=0 && vary[i]!=bs) {
1293         is_blocked = PETSC_FALSE;
1294         break;
1295       }
1296     }
1297     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1298       vbs = bs;
1299       for (i=0;i<n_R/vbs;i++) {
1300         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1301       }
1302     }
1303     ierr = PetscFree(vary);CHKERRQ(ierr);
1304   }
1305   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1306   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1307 
1308   /* print some info if requested */
1309   if (pcbddc->dbg_flag) {
1310     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1311     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1312     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1313     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1314     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1315     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1316     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
1317     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1318   }
1319 
1320   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1321   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1322   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1323   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1324   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1325   for (i=0; i<n_D; i++) {
1326     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1327   }
1328   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1329   for (i=0, j=0; i<n_R; i++) {
1330     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1331       aux_array1[j++] = i;
1332     }
1333   }
1334   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1335   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1336   for (i=0, j=0; i<n_B; i++) {
1337     if (!PetscBTLookup(bitmask,is_indices[i])) {
1338       aux_array2[j++] = i;
1339     }
1340   }
1341   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1342   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1343   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1344   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1345   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1346 
1347   if (pcbddc->switch_static || pcbddc->dbg_flag) {
1348     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1349     for (i=0, j=0; i<n_R; i++) {
1350       if (PetscBTLookup(bitmask,idx_R_local[i])) {
1351         aux_array1[j++] = i;
1352       }
1353     }
1354     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1355     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1356     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1357   }
1358   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1359   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1360   PetscFunctionReturn(0);
1361 }
1362 
1363 
1364 #undef __FUNCT__
1365 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1366 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1367 {
1368   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1369   PC_IS          *pcis = (PC_IS*)pc->data;
1370   PC             pc_temp;
1371   Mat            A_RR;
1372   MatReuse       reuse;
1373   PetscScalar    m_one = -1.0;
1374   PetscReal      value;
1375   PetscInt       n_D,n_R,ibs,mbs;
1376   PetscBool      use_exact,use_exact_reduced,issbaij;
1377   PetscErrorCode ierr;
1378   /* prefixes stuff */
1379   char           dir_prefix[256],neu_prefix[256],str_level[16];
1380   size_t         len;
1381 
1382   PetscFunctionBegin;
1383 
1384   /* compute prefixes */
1385   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1386   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1387   if (!pcbddc->current_level) {
1388     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1389     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1390     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1391     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1392   } else {
1393     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1394     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1395     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1396     len -= 15; /* remove "pc_bddc_coarse_" */
1397     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1398     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1399     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1400     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1401     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1402     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1403     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1404     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1405   }
1406 
1407   /* DIRICHLET PROBLEM */
1408   if (dirichlet) {
1409     if (pcbddc->issym) {
1410       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1411     }
1412     /* Matrix for Dirichlet problem is pcis->A_II */
1413     n_D = pcis->n - pcis->n_B;
1414     if (!pcbddc->ksp_D) { /* create object if not yet build */
1415       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1416       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1417       /* default */
1418       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1419       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1420       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1421       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1422       if (issbaij) {
1423         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1424       } else {
1425         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1426       }
1427       /* Allow user's customization */
1428       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1429       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1430     }
1431     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1432     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1433     if (!n_D) {
1434       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1435       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1436     }
1437     /* Set Up KSP for Dirichlet problem of BDDC */
1438     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1439     /* set ksp_D into pcis data */
1440     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1441     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1442     pcis->ksp_D = pcbddc->ksp_D;
1443   }
1444 
1445   /* NEUMANN PROBLEM */
1446   A_RR = 0;
1447   if (neumann) {
1448     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1449     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1450     if (pcbddc->ksp_R) { /* already created ksp */
1451       PetscInt nn_R;
1452       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1453       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1454       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1455       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1456         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1457         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1458         reuse = MAT_INITIAL_MATRIX;
1459       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1460         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1461           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1462           reuse = MAT_INITIAL_MATRIX;
1463         } else { /* safe to reuse the matrix */
1464           reuse = MAT_REUSE_MATRIX;
1465         }
1466       }
1467       /* last check */
1468       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1469         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1470         reuse = MAT_INITIAL_MATRIX;
1471       }
1472     } else { /* first time, so we need to create the matrix */
1473       reuse = MAT_INITIAL_MATRIX;
1474     }
1475     /* extract A_RR */
1476     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1477     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1478     if (ibs != mbs) {
1479       Mat newmat;
1480       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1481       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1482       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1483     } else {
1484       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1485     }
1486     if (pcbddc->issym) {
1487       ierr = MatSetOption(A_RR,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1488     }
1489     if (!pcbddc->ksp_R) { /* create object if not present */
1490       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1491       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1492       /* default */
1493       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1494       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1495       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1496       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1497       if (issbaij) {
1498         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1499       } else {
1500         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1501       }
1502       /* Allow user's customization */
1503       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1504       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1505     }
1506     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1507     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1508     if (!n_R) {
1509       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1510       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1511     }
1512     /* Set Up KSP for Neumann problem of BDDC */
1513     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1514   }
1515 
1516   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1517   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1518     if (pcbddc->dbg_flag) {
1519       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1520       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1521       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1522     }
1523     if (dirichlet) { /* Dirichlet */
1524       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1525       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1526       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1527       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1528       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1529       /* need to be adapted? */
1530       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1531       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1532       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1533       /* print info */
1534       if (pcbddc->dbg_flag) {
1535         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1536         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1537       }
1538       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1539         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1540       }
1541     }
1542     if (neumann) { /* Neumann */
1543       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1544       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1545       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1546       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1547       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1548       /* need to be adapted? */
1549       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1550       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1551       /* print info */
1552       if (pcbddc->dbg_flag) {
1553         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1554         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1555       }
1556       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1557         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1558       }
1559     }
1560   }
1561   /* free Neumann problem's matrix */
1562   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1563   PetscFunctionReturn(0);
1564 }
1565 
1566 #undef __FUNCT__
1567 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1568 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1569 {
1570   PetscErrorCode ierr;
1571   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1572 
1573   PetscFunctionBegin;
1574   if (applytranspose) {
1575     if (pcbddc->local_auxmat1) {
1576       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1577       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1578     }
1579     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1580   } else {
1581     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1582     if (pcbddc->local_auxmat1) {
1583       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1584       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1585     }
1586   }
1587   PetscFunctionReturn(0);
1588 }
1589 
1590 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1591 #undef __FUNCT__
1592 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1593 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1594 {
1595   PetscErrorCode ierr;
1596   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1597   PC_IS*            pcis = (PC_IS*)  (pc->data);
1598   const PetscScalar zero = 0.0;
1599 
1600   PetscFunctionBegin;
1601   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1602   if (applytranspose) {
1603     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1604     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1605   } else {
1606     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1607     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1608   }
1609   /* start communications from local primal nodes to rhs of coarse solver */
1610   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1611   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1612   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1613 
1614   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1615   /* TODO remove null space when doing multilevel */
1616   if (pcbddc->coarse_ksp) {
1617     Vec rhs,sol;
1618 
1619     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
1620     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
1621     if (applytranspose) {
1622       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1623     } else {
1624       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1625     }
1626   }
1627 
1628   /* Local solution on R nodes */
1629   if (pcis->n) {
1630     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1631     ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1632     ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1633     if (pcbddc->switch_static) {
1634       ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1635       ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1636     }
1637     ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1638     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1639     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1640     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1641     if (pcbddc->switch_static) {
1642       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1643       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1644     }
1645   }
1646 
1647   /* communications from coarse sol to local primal nodes */
1648   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1649   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1650 
1651   /* Sum contributions from two levels */
1652   if (applytranspose) {
1653     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1654     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1655   } else {
1656     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1657     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1658   }
1659   PetscFunctionReturn(0);
1660 }
1661 
1662 #undef __FUNCT__
1663 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1664 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1665 {
1666   PetscErrorCode ierr;
1667   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1668   PetscScalar    *array;
1669   Vec            from,to;
1670 
1671   PetscFunctionBegin;
1672   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1673     from = pcbddc->coarse_vec;
1674     to = pcbddc->vec1_P;
1675     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1676       Vec tvec;
1677 
1678       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1679       ierr = VecResetArray(tvec);CHKERRQ(ierr);
1680       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1681       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
1682       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
1683       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
1684     }
1685   } else { /* from local to global -> put data in coarse right hand side */
1686     from = pcbddc->vec1_P;
1687     to = pcbddc->coarse_vec;
1688   }
1689   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1690   PetscFunctionReturn(0);
1691 }
1692 
1693 #undef __FUNCT__
1694 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1695 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1696 {
1697   PetscErrorCode ierr;
1698   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1699   PetscScalar    *array;
1700   Vec            from,to;
1701 
1702   PetscFunctionBegin;
1703   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1704     from = pcbddc->coarse_vec;
1705     to = pcbddc->vec1_P;
1706   } else { /* from local to global -> put data in coarse right hand side */
1707     from = pcbddc->vec1_P;
1708     to = pcbddc->coarse_vec;
1709   }
1710   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1711   if (smode == SCATTER_FORWARD) {
1712     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1713       Vec tvec;
1714 
1715       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1716       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
1717       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
1718       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
1719     }
1720   } else {
1721     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
1722      ierr = VecResetArray(from);CHKERRQ(ierr);
1723     }
1724   }
1725   PetscFunctionReturn(0);
1726 }
1727 
1728 /* uncomment for testing purposes */
1729 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1730 #undef __FUNCT__
1731 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1732 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1733 {
1734   PetscErrorCode    ierr;
1735   PC_IS*            pcis = (PC_IS*)(pc->data);
1736   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1737   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1738   /* one and zero */
1739   PetscScalar       one=1.0,zero=0.0;
1740   /* space to store constraints and their local indices */
1741   PetscScalar       *temp_quadrature_constraint;
1742   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1743   /* iterators */
1744   PetscInt          i,j,k,total_counts,temp_start_ptr;
1745   /* BLAS integers */
1746   PetscBLASInt      lwork,lierr;
1747   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1748   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1749   /* reuse */
1750   PetscInt          olocal_primal_size;
1751   PetscInt          *oprimal_indices_local_idxs;
1752   /* change of basis */
1753   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1754   PetscBool         boolforchange,qr_needed;
1755   PetscBT           touched,change_basis,qr_needed_idx;
1756   /* auxiliary stuff */
1757   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1758   PetscInt          ncc,*gidxs=NULL,*permutation=NULL,*temp_indices_to_constraint_work=NULL;
1759   PetscScalar       *temp_quadrature_constraint_work=NULL;
1760   /* some quantities */
1761   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1762   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1763 
1764   PetscFunctionBegin;
1765   /* Destroy Mat objects computed previously */
1766   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1767   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1768 
1769   /* print some info */
1770   if (pcbddc->dbg_flag) {
1771     IS       vertices;
1772     PetscInt nv,nedges,nfaces;
1773     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
1774     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
1775     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
1776     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1777     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1778     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
1779     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
1780     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
1781     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1782   }
1783 
1784   if (!pcbddc->adaptive_selection) {
1785     IS           ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1786     MatNullSpace nearnullsp;
1787     const Vec    *nearnullvecs;
1788     Vec          *localnearnullsp;
1789     PetscScalar  *array;
1790     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
1791     PetscBool    nnsp_has_cnst;
1792     /* LAPACK working arrays for SVD or POD */
1793     PetscBool    skip_lapack;
1794     PetscScalar  *work;
1795     PetscReal    *singular_vals;
1796 #if defined(PETSC_USE_COMPLEX)
1797     PetscReal    *rwork;
1798 #endif
1799 #if defined(PETSC_MISSING_LAPACK_GESVD)
1800     PetscScalar  *temp_basis,*correlation_mat;
1801 #else
1802     PetscBLASInt dummy_int=1;
1803     PetscScalar  dummy_scalar=1.;
1804 #endif
1805 
1806     /* Get index sets for faces, edges and vertices from graph */
1807     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1808     /* free unneeded index sets */
1809     if (!pcbddc->use_vertices) {
1810       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1811     }
1812     if (!pcbddc->use_edges) {
1813       for (i=0;i<n_ISForEdges;i++) {
1814         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1815       }
1816       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1817       n_ISForEdges = 0;
1818     }
1819     if (!pcbddc->use_faces) {
1820       for (i=0;i<n_ISForFaces;i++) {
1821         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1822       }
1823       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1824       n_ISForFaces = 0;
1825     }
1826     /* HACKS (the following two blocks of code) */
1827     if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1828       pcbddc->use_change_of_basis = PETSC_TRUE;
1829       if (!ISForEdges) {
1830         pcbddc->use_change_on_faces = PETSC_TRUE;
1831       }
1832     }
1833     if (pcbddc->NullSpace) {
1834       /* use_change_of_basis should be consistent among processors */
1835       PetscBool tbool[2],gbool[2];
1836       tbool [0] = pcbddc->use_change_of_basis;
1837       tbool [1] = pcbddc->use_change_on_faces;
1838       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1839       pcbddc->use_change_of_basis = gbool[0];
1840       pcbddc->use_change_on_faces = gbool[1];
1841     }
1842 
1843     /* check if near null space is attached to global mat */
1844     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1845     if (nearnullsp) {
1846       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1847       /* remove any stored info */
1848       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1849       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1850       /* store information for BDDC solver reuse */
1851       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1852       pcbddc->onearnullspace = nearnullsp;
1853       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1854       for (i=0;i<nnsp_size;i++) {
1855         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1856       }
1857     } else { /* if near null space is not provided BDDC uses constants by default */
1858       nnsp_size = 0;
1859       nnsp_has_cnst = PETSC_TRUE;
1860     }
1861     /* get max number of constraints on a single cc */
1862     max_constraints = nnsp_size;
1863     if (nnsp_has_cnst) max_constraints++;
1864 
1865     /*
1866          Evaluate maximum storage size needed by the procedure
1867          - temp_indices will contain start index of each constraint stored as follows
1868          - temp_indices_to_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1869          - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1870          - temp_quadrature_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself
1871                                                                                                                                                            */
1872     total_counts = n_ISForFaces+n_ISForEdges;
1873     total_counts *= max_constraints;
1874     n_vertices = 0;
1875     if (ISForVertices) {
1876       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1877     }
1878     total_counts += n_vertices;
1879     ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr);
1880     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1881     total_counts = 0;
1882     max_size_of_constraint = 0;
1883     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1884       if (i<n_ISForEdges) {
1885         used_IS = &ISForEdges[i];
1886       } else {
1887         used_IS = &ISForFaces[i-n_ISForEdges];
1888       }
1889       ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1890       total_counts += j;
1891       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1892     }
1893     total_counts *= max_constraints;
1894     total_counts += n_vertices;
1895     ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1896     /* get local part of global near null space vectors */
1897     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1898     for (k=0;k<nnsp_size;k++) {
1899       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1900       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1901       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1902     }
1903 
1904     /* whether or not to skip lapack calls */
1905     skip_lapack = PETSC_TRUE;
1906     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
1907 
1908     /* allocate some auxiliary stuff */
1909     if (!skip_lapack || pcbddc->use_qr_single) {
1910       ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr);
1911     } else {
1912       gidxs = NULL;
1913       permutation = NULL;
1914       temp_indices_to_constraint_work = NULL;
1915       temp_quadrature_constraint_work = NULL;
1916     }
1917 
1918     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1919     if (!skip_lapack) {
1920       PetscScalar temp_work;
1921 
1922 #if defined(PETSC_MISSING_LAPACK_GESVD)
1923       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1924       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1925       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1926       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1927 #if defined(PETSC_USE_COMPLEX)
1928       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1929 #endif
1930       /* now we evaluate the optimal workspace using query with lwork=-1 */
1931       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1932       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1933       lwork = -1;
1934       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1935 #if !defined(PETSC_USE_COMPLEX)
1936       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1937 #else
1938       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1939 #endif
1940       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1941       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1942 #else /* on missing GESVD */
1943       /* SVD */
1944       PetscInt max_n,min_n;
1945       max_n = max_size_of_constraint;
1946       min_n = max_constraints;
1947       if (max_size_of_constraint < max_constraints) {
1948         min_n = max_size_of_constraint;
1949         max_n = max_constraints;
1950       }
1951       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1952 #if defined(PETSC_USE_COMPLEX)
1953       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1954 #endif
1955       /* now we evaluate the optimal workspace using query with lwork=-1 */
1956       lwork = -1;
1957       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1958       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1959       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1960       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1961 #if !defined(PETSC_USE_COMPLEX)
1962       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
1963 #else
1964       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
1965 #endif
1966       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1967       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1968 #endif /* on missing GESVD */
1969       /* Allocate optimal workspace */
1970       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1971       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
1972     }
1973     /* Now we can loop on constraining sets */
1974     total_counts = 0;
1975     temp_indices[0] = 0;
1976     /* vertices */
1977     if (ISForVertices) {
1978       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1979       if (nnsp_has_cnst) { /* consider all vertices */
1980         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1981         for (i=0;i<n_vertices;i++) {
1982           temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1983           temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1984           total_counts++;
1985         }
1986       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1987         PetscBool used_vertex;
1988         for (i=0;i<n_vertices;i++) {
1989           used_vertex = PETSC_FALSE;
1990           k = 0;
1991           while (!used_vertex && k<nnsp_size) {
1992             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1993             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1994               temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1995               temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1996               temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1997               total_counts++;
1998               used_vertex = PETSC_TRUE;
1999             }
2000             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2001             k++;
2002           }
2003         }
2004       }
2005       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2006       n_vertices = total_counts;
2007     }
2008 
2009     /* edges and faces */
2010     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2011       if (ncc<n_ISForEdges) {
2012         used_IS = &ISForEdges[ncc];
2013         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2014       } else {
2015         used_IS = &ISForFaces[ncc-n_ISForEdges];
2016         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2017       }
2018       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2019       temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
2020       ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
2021       ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2022       /* change of basis should not be performed on local periodic nodes */
2023       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2024       if (nnsp_has_cnst) {
2025         PetscScalar quad_value;
2026         temp_constraints++;
2027         if (!pcbddc->use_nnsp_true) {
2028           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2029         } else {
2030           quad_value = 1.0;
2031         }
2032         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2033         for (j=0;j<size_of_constraint;j++) {
2034           temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
2035         }
2036         /* sort by global ordering if using lapack subroutines (not needed!) */
2037         if (!skip_lapack || pcbddc->use_qr_single) {
2038           ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
2039           for (j=0;j<size_of_constraint;j++) {
2040             permutation[j]=j;
2041           }
2042           ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2043           for (j=0;j<size_of_constraint;j++) {
2044             if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen");
2045           }
2046           for (j=0;j<size_of_constraint;j++) {
2047             temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2048             temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2049           }
2050           ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2051           ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2052         }
2053         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2054         total_counts++;
2055       }
2056       for (k=0;k<nnsp_size;k++) {
2057         PetscReal real_value;
2058         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2059         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2060         for (j=0;j<size_of_constraint;j++) {
2061           temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
2062         }
2063         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2064         /* check if array is null on the connected component */
2065         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2066         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
2067         if (real_value > 0.0) { /* keep indices and values */
2068           /* sort by global ordering if using lapack subroutines */
2069           if (!skip_lapack || pcbddc->use_qr_single) {
2070             ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
2071             for (j=0;j<size_of_constraint;j++) {
2072               permutation[j]=j;
2073             }
2074             ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2075             for (j=0;j<size_of_constraint;j++) {
2076               temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2077               temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2078             }
2079             ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2080             ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2081           }
2082           temp_constraints++;
2083           temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2084           total_counts++;
2085         }
2086       }
2087       ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2088       valid_constraints = temp_constraints;
2089       if (!pcbddc->use_nnsp_true && temp_constraints) {
2090         if (temp_constraints == 1) { /* just normalize the constraint */
2091           PetscScalar norm;
2092           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2093           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2094           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2095           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2096         } else { /* perform SVD */
2097           PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2098 
2099 #if defined(PETSC_MISSING_LAPACK_GESVD)
2100           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2101              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2102              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2103                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2104                 from that computed using LAPACKgesvd
2105              -> This is due to a different computation of eigenvectors in LAPACKheev
2106              -> The quality of the POD-computed basis will be the same */
2107           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2108           /* Store upper triangular part of correlation matrix */
2109           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2110           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2111           for (j=0;j<temp_constraints;j++) {
2112             for (k=0;k<j+1;k++) {
2113               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one));
2114             }
2115           }
2116           /* compute eigenvalues and eigenvectors of correlation matrix */
2117           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2118           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2119 #if !defined(PETSC_USE_COMPLEX)
2120           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2121 #else
2122           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2123 #endif
2124           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2125           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2126           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2127           j = 0;
2128           while (j < temp_constraints && singular_vals[j] < tol) j++;
2129           total_counts = total_counts-j;
2130           valid_constraints = temp_constraints-j;
2131           /* scale and copy POD basis into used quadrature memory */
2132           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2133           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2134           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2135           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2136           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2137           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2138           if (j<temp_constraints) {
2139             PetscInt ii;
2140             for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
2141             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2142             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2143             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2144             for (k=0;k<temp_constraints-j;k++) {
2145               for (ii=0;ii<size_of_constraint;ii++) {
2146                 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2147               }
2148             }
2149           }
2150 #else  /* on missing GESVD */
2151           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2152           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2153           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2154           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2155 #if !defined(PETSC_USE_COMPLEX)
2156           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2157 #else
2158           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2159 #endif
2160           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2161           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2162           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2163           k = temp_constraints;
2164           if (k > size_of_constraint) k = size_of_constraint;
2165           j = 0;
2166           while (j < k && singular_vals[k-j-1] < tol) j++;
2167           valid_constraints = k-j;
2168           total_counts = total_counts-temp_constraints+valid_constraints;
2169 #endif /* on missing GESVD */
2170         }
2171       }
2172       /* setting change_of_basis flag is safe now */
2173       if (boolforchange) {
2174         for (j=0;j<valid_constraints;j++) {
2175           PetscBTSet(change_basis,total_counts-j-1);
2176         }
2177       }
2178     }
2179     /* free workspace */
2180     if (!skip_lapack || pcbddc->use_qr_single) {
2181       ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr);
2182     }
2183     if (!skip_lapack) {
2184       ierr = PetscFree(work);CHKERRQ(ierr);
2185 #if defined(PETSC_USE_COMPLEX)
2186       ierr = PetscFree(rwork);CHKERRQ(ierr);
2187 #endif
2188       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2189 #if defined(PETSC_MISSING_LAPACK_GESVD)
2190       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2191       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2192 #endif
2193     }
2194     for (k=0;k<nnsp_size;k++) {
2195       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2196     }
2197     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2198     /* free index sets of faces, edges and vertices */
2199     for (i=0;i<n_ISForFaces;i++) {
2200       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2201     }
2202     if (n_ISForFaces) {
2203       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2204     }
2205     for (i=0;i<n_ISForEdges;i++) {
2206       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2207     }
2208     if (n_ISForEdges) {
2209       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2210     }
2211     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2212   } else {
2213     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2214     PetscInt        cum = 0;
2215 
2216     total_counts = 0;
2217     n_vertices = 0;
2218     if (sub_schurs->is_Ej_com) {
2219       ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr);
2220     }
2221     max_constraints = 0;
2222     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2223       total_counts += pcbddc->adaptive_constraints_n[i];
2224       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2225     }
2226     temp_indices = pcbddc->adaptive_constraints_ptrs;
2227     temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs;
2228     temp_quadrature_constraint = pcbddc->adaptive_constraints_data;
2229 
2230 #if 0
2231     printf("Found %d totals\n",total_counts);
2232     for (i=0;i<total_counts;i++) {
2233       printf("const %d, start %d",i,temp_indices[i]);
2234       printf(" end %d:\n",temp_indices[i+1]);
2235       for (j=temp_indices[i];j<temp_indices[i+1];j++) {
2236         printf("  idxs %d",temp_indices_to_constraint[j]);
2237         printf("  data %1.2e\n",temp_quadrature_constraint[j]);
2238       }
2239     }
2240     for (i=0;i<n_vertices;i++) {
2241       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i+n_vertices]);
2242     }
2243     for (i=0;i<sub_schurs->n_subs;i++) {
2244       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2245     }
2246 #endif
2247 
2248     for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]);
2249     ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr);
2250     /* Change of basis */
2251     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2252     if (pcbddc->use_change_of_basis) {
2253       cum = n_vertices;
2254       for (i=0;i<sub_schurs->n_subs;i++) {
2255         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2256           for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) {
2257             ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr);
2258           }
2259         }
2260         cum += pcbddc->adaptive_constraints_n[i+n_vertices];
2261       }
2262     }
2263   }
2264 
2265   /* map temp_indices_to_constraint in boundary numbering */
2266   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
2267   if (i != temp_indices[total_counts]) {
2268     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
2269   }
2270 
2271   /* set quantities in pcbddc data structure and store previous primal size */
2272   /* n_vertices defines the number of subdomain corners in the primal space */
2273   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
2274   olocal_primal_size = pcbddc->local_primal_size;
2275   pcbddc->local_primal_size = total_counts;
2276   pcbddc->n_vertices = n_vertices;
2277   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
2278 
2279   /* Create constraint matrix */
2280   /* The constraint matrix is used to compute the l2g map of primal dofs */
2281   /* so we need to set it up properly either with or without change of basis */
2282   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2283   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2284   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2285   /* array to compute a local numbering of constraints : vertices first then constraints */
2286   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
2287   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
2288   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
2289   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
2290   /* auxiliary stuff for basis change */
2291   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
2292   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
2293 
2294   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2295   total_primal_vertices=0;
2296   for (i=0;i<pcbddc->local_primal_size;i++) {
2297     size_of_constraint=temp_indices[i+1]-temp_indices[i];
2298     if (size_of_constraint == 1) {
2299       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
2300       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
2301       aux_primal_minloc[total_primal_vertices]=0;
2302       total_primal_vertices++;
2303     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
2304       PetscInt min_loc,min_index;
2305       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
2306       /* find first untouched local node */
2307       k = 0;
2308       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
2309       min_index = global_indices[k];
2310       min_loc = k;
2311       /* search the minimum among global nodes already untouched on the cc */
2312       for (k=1;k<size_of_constraint;k++) {
2313         /* there can be more than one constraint on a single connected component */
2314         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
2315           min_index = global_indices[k];
2316           min_loc = k;
2317         }
2318       }
2319       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
2320       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
2321       aux_primal_minloc[total_primal_vertices]=min_loc;
2322       total_primal_vertices++;
2323     }
2324   }
2325   /* determine if a QR strategy is needed for change of basis */
2326   qr_needed = PETSC_FALSE;
2327   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
2328   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2329     if (PetscBTLookup(change_basis,i)) {
2330       if (!pcbddc->use_qr_single) {
2331         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2332         j = 0;
2333         for (k=0;k<size_of_constraint;k++) {
2334           if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
2335             j++;
2336           }
2337         }
2338         /* found more than one primal dof on the cc */
2339         if (j > 1) {
2340           PetscBTSet(qr_needed_idx,i);
2341           qr_needed = PETSC_TRUE;
2342         }
2343       } else {
2344         PetscBTSet(qr_needed_idx,i);
2345         qr_needed = PETSC_TRUE;
2346       }
2347     }
2348   }
2349   /* free workspace */
2350   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2351 
2352   /* permute indices in order to have a sorted set of vertices */
2353   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
2354 
2355   /* nonzero structure of constraint matrix */
2356   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2357   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
2358   j=total_primal_vertices;
2359   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2360     if (!PetscBTLookup(change_basis,i)) {
2361       nnz[j]=temp_indices[i+1]-temp_indices[i];
2362       j++;
2363     }
2364   }
2365   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2366   ierr = PetscFree(nnz);CHKERRQ(ierr);
2367   /* set values in constraint matrix */
2368   for (i=0;i<total_primal_vertices;i++) {
2369     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2370   }
2371   total_counts = total_primal_vertices;
2372   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2373     if (!PetscBTLookup(change_basis,i)) {
2374       size_of_constraint=temp_indices[i+1]-temp_indices[i];
2375       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
2376       total_counts++;
2377     }
2378   }
2379   /* assembling */
2380   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2381   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2382   /*
2383   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2384   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2385   */
2386   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2387   if (pcbddc->use_change_of_basis) {
2388     /* dual and primal dofs on a single cc */
2389     PetscInt     dual_dofs,primal_dofs;
2390     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
2391     PetscInt     primal_counter;
2392     /* working stuff for GEQRF */
2393     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2394     PetscBLASInt lqr_work;
2395     /* working stuff for UNGQR */
2396     PetscScalar  *gqr_work,lgqr_work_t;
2397     PetscBLASInt lgqr_work;
2398     /* working stuff for TRTRS */
2399     PetscScalar  *trs_rhs;
2400     PetscBLASInt Blas_NRHS;
2401     /* pointers for values insertion into change of basis matrix */
2402     PetscInt     *start_rows,*start_cols;
2403     PetscScalar  *start_vals;
2404     /* working stuff for values insertion */
2405     PetscBT      is_primal;
2406     /* matrix sizes */
2407     PetscInt     global_size,local_size;
2408     /* temporary change of basis */
2409     Mat          localChangeOfBasisMatrix;
2410     /* extra space for debugging */
2411     PetscScalar  *dbg_work;
2412 
2413     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2414     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2415     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2416     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2417     /* nonzeros for local mat */
2418     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2419     for (i=0;i<pcis->n;i++) nnz[i]=1;
2420     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2421       if (PetscBTLookup(change_basis,i)) {
2422         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2423         if (PetscBTLookup(qr_needed_idx,i)) {
2424           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2425         } else {
2426           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2;
2427           /* get local primal index on the cc */
2428           j = 0;
2429           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
2430           nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2431         }
2432       }
2433     }
2434     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2435     ierr = PetscFree(nnz);CHKERRQ(ierr);
2436     /* Set initial identity in the matrix */
2437     for (i=0;i<pcis->n;i++) {
2438       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2439     }
2440 
2441     if (pcbddc->dbg_flag) {
2442       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2443       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2444     }
2445 
2446 
2447     /* Now we loop on the constraints which need a change of basis */
2448     /*
2449        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2450        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2451 
2452        Basic blocks of change of basis matrix T computed by
2453 
2454           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2455 
2456             | 1        0   ...        0         s_1/S |
2457             | 0        1   ...        0         s_2/S |
2458             |              ...                        |
2459             | 0        ...            1     s_{n-1}/S |
2460             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2461 
2462             with S = \sum_{i=1}^n s_i^2
2463             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2464                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2465 
2466           - QR decomposition of constraints otherwise
2467     */
2468     if (qr_needed) {
2469       /* space to store Q */
2470       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2471       /* first we issue queries for optimal work */
2472       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2473       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2474       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2475       lqr_work = -1;
2476       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2477       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2478       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2479       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2480       lgqr_work = -1;
2481       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2482       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2483       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2484       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2485       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2486       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2487       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2488       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2489       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2490       /* array to store scaling factors for reflectors */
2491       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2492       /* array to store rhs and solution of triangular solver */
2493       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2494       /* allocating workspace for check */
2495       if (pcbddc->dbg_flag) {
2496         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
2497       }
2498     }
2499     /* array to store whether a node is primal or not */
2500     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2501     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2502     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2503     if (i != total_primal_vertices) {
2504       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2505     }
2506     for (i=0;i<total_primal_vertices;i++) {
2507       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2508     }
2509     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2510 
2511     /* loop on constraints and see whether or not they need a change of basis and compute it */
2512     /* -> using implicit ordering contained in temp_indices data */
2513     total_counts = pcbddc->n_vertices;
2514     primal_counter = total_counts;
2515     while (total_counts<pcbddc->local_primal_size) {
2516       primal_dofs = 1;
2517       if (PetscBTLookup(change_basis,total_counts)) {
2518         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2519         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) {
2520           primal_dofs++;
2521         }
2522         /* get constraint info */
2523         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2524         dual_dofs = size_of_constraint-primal_dofs;
2525 
2526         if (pcbddc->dbg_flag) {
2527           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2528         }
2529 
2530         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2531 
2532           /* copy quadrature constraints for change of basis check */
2533           if (pcbddc->dbg_flag) {
2534             ierr = PetscMemcpy(dbg_work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2535           }
2536           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2537           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2538 
2539           /* compute QR decomposition of constraints */
2540           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2541           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2542           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2543           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2544           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2545           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2546           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2547 
2548           /* explictly compute R^-T */
2549           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2550           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2551           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2552           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2553           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2554           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2555           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2556           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2557           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2558           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2559 
2560           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2561           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2562           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2563           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2564           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2565           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2566           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2567           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2568           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2569 
2570           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2571              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2572              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2573           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2574           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2575           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2576           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2577           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2578           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2579           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2580           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2581           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2582           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2583 
2584           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2585           start_rows = &temp_indices_to_constraint[temp_indices[total_counts]];
2586           /* insert cols for primal dofs */
2587           for (j=0;j<primal_dofs;j++) {
2588             start_vals = &qr_basis[j*size_of_constraint];
2589             start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2590             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2591           }
2592           /* insert cols for dual dofs */
2593           for (j=0,k=0;j<dual_dofs;k++) {
2594             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2595               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2596               start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k];
2597               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2598               j++;
2599             }
2600           }
2601 
2602           /* check change of basis */
2603           if (pcbddc->dbg_flag) {
2604             PetscInt   ii,jj;
2605             PetscBool valid_qr=PETSC_TRUE;
2606             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2607             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2608             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2609             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2610             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2611             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2612             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2613             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
2614             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2615             for (jj=0;jj<size_of_constraint;jj++) {
2616               for (ii=0;ii<primal_dofs;ii++) {
2617                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2618                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2619               }
2620             }
2621             if (!valid_qr) {
2622               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2623               for (jj=0;jj<size_of_constraint;jj++) {
2624                 for (ii=0;ii<primal_dofs;ii++) {
2625                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2626                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2627                   }
2628                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2629                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2630                   }
2631                 }
2632               }
2633             } else {
2634               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2635             }
2636           }
2637         } else { /* simple transformation block */
2638           PetscInt    row,col;
2639           PetscScalar val,norm;
2640 
2641           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2642           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one));
2643           for (j=0;j<size_of_constraint;j++) {
2644             PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2645             row = temp_indices_to_constraint[temp_indices[total_counts]+j];
2646             if (!PetscBTLookup(is_primal,row_B)) {
2647               col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2648               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2649               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2650             } else {
2651               for (k=0;k<size_of_constraint;k++) {
2652                 col = temp_indices_to_constraint[temp_indices[total_counts]+k];
2653                 if (row != col) {
2654                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2655                 } else {
2656                   val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm;
2657                 }
2658                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2659               }
2660             }
2661           }
2662           if (pcbddc->dbg_flag) {
2663             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2664           }
2665         }
2666         /* increment primal counter */
2667         primal_counter += primal_dofs;
2668       } else {
2669         if (pcbddc->dbg_flag) {
2670           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2671         }
2672       }
2673       /* increment constraint counter total_counts */
2674       total_counts += primal_dofs;
2675     }
2676 
2677     /* free workspace */
2678     if (qr_needed) {
2679       if (pcbddc->dbg_flag) {
2680         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
2681       }
2682       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2683       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2684       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2685       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2686       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2687     }
2688     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2689     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2690     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2691 
2692     /* assembling of global change of variable */
2693     {
2694       Mat      tmat;
2695       PetscInt bs;
2696 
2697       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2698       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2699       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2700       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2701       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2702       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2703       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
2704       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
2705       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2706       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2707       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2708       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2709       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2710       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2711       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2712       ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2713       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2714       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2715     }
2716     /* check */
2717     if (pcbddc->dbg_flag) {
2718       PetscReal error;
2719       Vec       x,x_change;
2720 
2721       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2722       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2723       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2724       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2725       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2726       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2727       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2728       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2729       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2730       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2731       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2732       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2733       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2734       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2735       ierr = VecDestroy(&x);CHKERRQ(ierr);
2736       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2737     }
2738 
2739     /* adapt sub_schurs computed (if any) */
2740     if (pcbddc->use_deluxe_scaling) {
2741       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2742       if (sub_schurs->n_subs_par_g) {
2743         SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Change of basis with deluxe scaling and parallel problems still needs to be implemented");
2744       }
2745       if (sub_schurs->S_Ej_all) {
2746         Mat S_1,S_2,tmat;
2747         IS is_all_N;
2748 
2749         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2750         ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,&tmat);CHKERRQ(ierr);
2751         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2752         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr);
2753         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2754         sub_schurs->S_Ej_all = S_1;
2755         ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr);
2756         ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2757         sub_schurs->sum_S_Ej_all = S_2;
2758         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2759       }
2760     }
2761     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2762   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2763     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2764     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2765   }
2766 
2767   /* set up change of basis context */
2768   if (pcbddc->ChangeOfBasisMatrix) {
2769     PCBDDCChange_ctx change_ctx;
2770 
2771     if (!pcbddc->new_global_mat) {
2772       PetscInt global_size,local_size;
2773 
2774       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2775       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2776       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2777       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2778       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2779       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2780       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2781       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2782       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2783     } else {
2784       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2785       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2786       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2787     }
2788     if (!pcbddc->user_ChangeOfBasisMatrix) {
2789       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2790       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2791     } else {
2792       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2793       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2794     }
2795     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2796     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2797     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2798     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2799   }
2800 
2801   /* get indices in local ordering for vertices and constraints */
2802   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2803     ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
2804     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2805   }
2806   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2807   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2808   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2809   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2810   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2811   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2812   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2813   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2814   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2815   /* set quantities in PCBDDC data struct */
2816   pcbddc->n_actual_vertices = i;
2817   /* check if a new primal space has been introduced */
2818   pcbddc->new_primal_space_local = PETSC_TRUE;
2819   if (olocal_primal_size == pcbddc->local_primal_size) {
2820     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2821     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2822     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2823   }
2824   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2825   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2826 
2827   /* flush dbg viewer */
2828   if (pcbddc->dbg_flag) {
2829     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2830   }
2831 
2832   /* free workspace */
2833   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2834   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2835   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2836   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2837   if (!pcbddc->adaptive_selection) {
2838     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2839     ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr);
2840   } else {
2841     ierr = PetscFree4(pcbddc->adaptive_constraints_n,
2842                       pcbddc->adaptive_constraints_ptrs,
2843                       pcbddc->adaptive_constraints_idxs,
2844                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2845     ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2846   }
2847   PetscFunctionReturn(0);
2848 }
2849 
2850 #undef __FUNCT__
2851 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2852 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2853 {
2854   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2855   PC_IS       *pcis = (PC_IS*)pc->data;
2856   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2857   PetscInt    ierr,i,vertex_size;
2858   PetscViewer viewer=pcbddc->dbg_viewer;
2859 
2860   PetscFunctionBegin;
2861   /* Reset previously computed graph */
2862   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2863   /* Init local Graph struct */
2864   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2865 
2866   /* Check validity of the csr graph passed in by the user */
2867   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2868     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2869   }
2870 
2871   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2872   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2873     PetscInt  *xadj,*adjncy;
2874     PetscInt  nvtxs;
2875     PetscBool flg_row=PETSC_FALSE;
2876 
2877     if (pcbddc->use_local_adj) {
2878 
2879       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2880       if (flg_row) {
2881         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2882         pcbddc->computed_rowadj = PETSC_TRUE;
2883       }
2884       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2885     } else if (pcbddc->current_level) { /* just compute subdomain's connected components for coarser levels */
2886       IS                     is_dummy;
2887       ISLocalToGlobalMapping l2gmap_dummy;
2888       PetscInt               j,sum;
2889       PetscInt               *cxadj,*cadjncy;
2890       const PetscInt         *idxs;
2891       PCBDDCGraph            graph;
2892       PetscBT                is_on_boundary;
2893 
2894       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
2895       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
2896       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
2897       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
2898       ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr);
2899       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
2900       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2901       if (flg_row) {
2902         graph->xadj = xadj;
2903         graph->adjncy = adjncy;
2904       }
2905       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
2906       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
2907       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2908 
2909       if (pcbddc->dbg_flag) {
2910         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr);
2911         for (i=0;i<graph->ncc;i++) {
2912           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
2913         }
2914         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2915       }
2916 
2917       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
2918       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2919       for (i=0;i<pcis->n_B;i++) {
2920         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
2921       }
2922       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2923 
2924       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
2925       sum = 0;
2926       for (i=0;i<graph->ncc;i++) {
2927         PetscInt sizecc = 0;
2928         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2929           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2930             sizecc++;
2931           }
2932         }
2933         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2934           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2935             cxadj[graph->queue[j]] = sizecc;
2936           }
2937         }
2938         sum += sizecc*sizecc;
2939       }
2940       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
2941       sum = 0;
2942       for (i=0;i<pcis->n;i++) {
2943         PetscInt temp = cxadj[i];
2944         cxadj[i] = sum;
2945         sum += temp;
2946       }
2947       cxadj[pcis->n] = sum;
2948       for (i=0;i<graph->ncc;i++) {
2949         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2950           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2951             PetscInt k,sizecc = 0;
2952             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
2953               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
2954                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
2955                 sizecc++;
2956               }
2957             }
2958           }
2959         }
2960       }
2961       if (pcis->n) {
2962         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
2963       } else {
2964         ierr = PetscFree(cxadj);CHKERRQ(ierr);
2965         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
2966       }
2967       graph->xadj = 0;
2968       graph->adjncy = 0;
2969       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
2970       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
2971     }
2972   }
2973 
2974   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2975   vertex_size = 1;
2976   if (pcbddc->user_provided_isfordofs) {
2977     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2978       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2979       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2980         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2981         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2982       }
2983       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2984       pcbddc->n_ISForDofs = 0;
2985       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2986     }
2987     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2988     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2989   } else {
2990     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2991       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2992       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2993       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2994         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2995       }
2996     }
2997   }
2998 
2999   /* Setup of Graph */
3000   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3001     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
3002   }
3003   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3004     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
3005   }
3006   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
3007 
3008   /* Graph's connected components analysis */
3009   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
3010 
3011   /* print some info to stdout */
3012   if (pcbddc->dbg_flag) {
3013     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
3014   }
3015 
3016   /* mark topography has done */
3017   pcbddc->recompute_topography = PETSC_FALSE;
3018   PetscFunctionReturn(0);
3019 }
3020 
3021 #undef __FUNCT__
3022 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
3023 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
3024 {
3025   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
3026   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
3027   PetscErrorCode ierr;
3028 
3029   PetscFunctionBegin;
3030   n = 0;
3031   vertices = 0;
3032   if (pcbddc->ConstraintMatrix) {
3033     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
3034     for (i=0;i<local_primal_size;i++) {
3035       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3036       if (size_of_constraint == 1) n++;
3037       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3038     }
3039     if (vertices_idx) {
3040       ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr);
3041       n = 0;
3042       for (i=0;i<local_primal_size;i++) {
3043         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3044         if (size_of_constraint == 1) {
3045           vertices[n++]=row_cmat_indices[0];
3046         }
3047         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3048       }
3049     }
3050   }
3051   *n_vertices = n;
3052   if (vertices_idx) *vertices_idx = vertices;
3053   PetscFunctionReturn(0);
3054 }
3055 
3056 #undef __FUNCT__
3057 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
3058 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
3059 {
3060   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
3061   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
3062   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
3063   PetscBT        touched;
3064   PetscErrorCode ierr;
3065 
3066     /* This function assumes that the number of local constraints per connected component
3067        is not greater than the number of nodes defined for the connected component
3068        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
3069   PetscFunctionBegin;
3070   n = 0;
3071   constraints_index = 0;
3072   if (pcbddc->ConstraintMatrix) {
3073     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
3074     max_size_of_constraint = 0;
3075     for (i=0;i<local_primal_size;i++) {
3076       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3077       if (size_of_constraint > 1) {
3078         n++;
3079       }
3080       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
3081       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3082     }
3083     if (constraints_idx) {
3084       ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr);
3085       ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr);
3086       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
3087       n = 0;
3088       for (i=0;i<local_primal_size;i++) {
3089         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3090         if (size_of_constraint > 1) {
3091           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
3092           /* find first untouched local node */
3093           j = 0;
3094           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
3095           min_index = row_cmat_global_indices[j];
3096           min_loc = j;
3097           /* search the minimum among nodes not yet touched on the connected component
3098              since there can be more than one constraint on a single cc */
3099           for (j=1;j<size_of_constraint;j++) {
3100             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
3101               min_index = row_cmat_global_indices[j];
3102               min_loc = j;
3103             }
3104           }
3105           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
3106           constraints_index[n++] = row_cmat_indices[min_loc];
3107         }
3108         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3109       }
3110       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
3111       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
3112     }
3113   }
3114   *n_constraints = n;
3115   if (constraints_idx) *constraints_idx = constraints_index;
3116   PetscFunctionReturn(0);
3117 }
3118 
3119 #undef __FUNCT__
3120 #define __FUNCT__ "PCBDDCSubsetNumbering"
3121 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
3122 {
3123   Vec            local_vec,global_vec;
3124   IS             seqis,paris;
3125   VecScatter     scatter_ctx;
3126   PetscScalar    *array;
3127   PetscInt       *temp_global_dofs;
3128   PetscScalar    globalsum;
3129   PetscInt       i,j,s;
3130   PetscInt       nlocals,first_index,old_index,max_local;
3131   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
3132   PetscMPIInt    *dof_sizes,*dof_displs;
3133   PetscBool      first_found;
3134   PetscErrorCode ierr;
3135 
3136   PetscFunctionBegin;
3137   /* mpi buffers */
3138   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
3139   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
3140   j = ( !rank_prec_comm ? size_prec_comm : 0);
3141   ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr);
3142   ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr);
3143   /* get maximum size of subset */
3144   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
3145   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
3146   max_local = 0;
3147   for (i=0;i<n_local_dofs;i++) {
3148     if (max_local < temp_global_dofs[i] ) {
3149       max_local = temp_global_dofs[i];
3150     }
3151   }
3152   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
3153   max_global++;
3154   max_local = 0;
3155   for (i=0;i<n_local_dofs;i++) {
3156     if (max_local < local_dofs[i] ) {
3157       max_local = local_dofs[i];
3158     }
3159   }
3160   max_local++;
3161   /* allocate workspace */
3162   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
3163   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
3164   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
3165   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
3166   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
3167   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
3168   /* create scatter */
3169   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
3170   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
3171   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
3172   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
3173   ierr = ISDestroy(&paris);CHKERRQ(ierr);
3174   /* init array */
3175   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
3176   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3177   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3178   if (local_dofs_mult) {
3179     for (i=0;i<n_local_dofs;i++) {
3180       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
3181     }
3182   } else {
3183     for (i=0;i<n_local_dofs;i++) {
3184       array[local_dofs[i]]=1.0;
3185     }
3186   }
3187   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3188   /* scatter into global vec and get total number of global dofs */
3189   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3190   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3191   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
3192   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
3193   /* Fill global_vec with cumulative function for global numbering */
3194   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
3195   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
3196   nlocals = 0;
3197   first_index = -1;
3198   first_found = PETSC_FALSE;
3199   for (i=0;i<s;i++) {
3200     if (!first_found && PetscRealPart(array[i]) > 0.1) {
3201       first_found = PETSC_TRUE;
3202       first_index = i;
3203     }
3204     nlocals += (PetscInt)PetscRealPart(array[i]);
3205   }
3206   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3207   if (!rank_prec_comm) {
3208     dof_displs[0]=0;
3209     for (i=1;i<size_prec_comm;i++) {
3210       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3211     }
3212   }
3213   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3214   if (first_found) {
3215     array[first_index] += (PetscScalar)nlocals;
3216     old_index = first_index;
3217     for (i=first_index+1;i<s;i++) {
3218       if (PetscRealPart(array[i]) > 0.1) {
3219         array[i] += array[old_index];
3220         old_index = i;
3221       }
3222     }
3223   }
3224   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
3225   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3226   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3227   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3228   /* get global ordering of local dofs */
3229   ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3230   if (local_dofs_mult) {
3231     for (i=0;i<n_local_dofs;i++) {
3232       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
3233     }
3234   } else {
3235     for (i=0;i<n_local_dofs;i++) {
3236       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
3237     }
3238   }
3239   ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3240   /* free workspace */
3241   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
3242   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
3243   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
3244   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
3245   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
3246   /* return pointer to global ordering of local dofs */
3247   *global_numbering_subset = temp_global_dofs;
3248   PetscFunctionReturn(0);
3249 }
3250 
3251 #undef __FUNCT__
3252 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3253 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3254 {
3255   PetscInt       i,j;
3256   PetscScalar    *alphas;
3257   PetscErrorCode ierr;
3258 
3259   PetscFunctionBegin;
3260   /* this implements stabilized Gram-Schmidt */
3261   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3262   for (i=0;i<n;i++) {
3263     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3264     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3265     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3266   }
3267   ierr = PetscFree(alphas);CHKERRQ(ierr);
3268   PetscFunctionReturn(0);
3269 }
3270 
3271 #undef __FUNCT__
3272 #define __FUNCT__ "MatISGetSubassemblingPattern"
3273 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
3274 {
3275   Mat             subdomain_adj;
3276   IS              new_ranks,ranks_send_to;
3277   MatPartitioning partitioner;
3278   Mat_IS          *matis;
3279   PetscInt        n_neighs,*neighs,*n_shared,**shared;
3280   PetscInt        prank;
3281   PetscMPIInt     size,rank,color;
3282   PetscInt        *xadj,*adjncy,*oldranks;
3283   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
3284   PetscInt        i,local_size,threshold=0;
3285   PetscErrorCode  ierr;
3286   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3287   PetscSubcomm    subcomm;
3288 
3289   PetscFunctionBegin;
3290   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3291   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3292   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3293 
3294   /* Get info on mapping */
3295   matis = (Mat_IS*)(mat->data);
3296   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3297   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3298 
3299   /* build local CSR graph of subdomains' connectivity */
3300   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3301   xadj[0] = 0;
3302   xadj[1] = PetscMax(n_neighs-1,0);
3303   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3304   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3305 
3306   if (threshold) {
3307     PetscInt xadj_count = 0;
3308     for (i=1;i<n_neighs;i++) {
3309       if (n_shared[i] > threshold) {
3310         adjncy[xadj_count] = neighs[i];
3311         adjncy_wgt[xadj_count] = n_shared[i];
3312         xadj_count++;
3313       }
3314     }
3315     xadj[1] = xadj_count;
3316   } else {
3317     if (xadj[1]) {
3318       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3319       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3320     }
3321   }
3322   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3323   if (use_square) {
3324     for (i=0;i<xadj[1];i++) {
3325       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3326     }
3327   }
3328   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3329 
3330   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3331 
3332   /*
3333     Restrict work on active processes only.
3334   */
3335   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3336   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3337   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3338   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3339   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3340   if (color) {
3341     ierr = PetscFree(xadj);CHKERRQ(ierr);
3342     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3343     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3344   } else {
3345     PetscInt coarsening_ratio;
3346     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
3347     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3348     prank = rank;
3349     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
3350     /*
3351     for (i=0;i<size;i++) {
3352       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3353     }
3354     */
3355     for (i=0;i<xadj[1];i++) {
3356       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3357     }
3358     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3359     ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3360     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3361 
3362     /* Partition */
3363     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
3364     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3365     if (use_vwgt) {
3366       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3367       v_wgt[0] = local_size;
3368       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3369     }
3370     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3371     coarsening_ratio = size/n_subdomains;
3372     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3373     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3374     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3375     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3376 
3377     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3378     if (contiguous) {
3379       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3380     } else {
3381       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3382     }
3383     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3384     /* clean up */
3385     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3386     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3387     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3388     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3389   }
3390   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3391 
3392   /* assemble parallel IS for sends */
3393   i = 1;
3394   if (color) i=0;
3395   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3396 
3397   /* get back IS */
3398   *is_sends = ranks_send_to;
3399   PetscFunctionReturn(0);
3400 }
3401 
3402 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3403 
3404 #undef __FUNCT__
3405 #define __FUNCT__ "MatISSubassemble"
3406 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3407 {
3408   Mat                    local_mat;
3409   Mat_IS                 *matis;
3410   IS                     is_sends_internal;
3411   PetscInt               rows,cols,new_local_rows;
3412   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3413   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3414   ISLocalToGlobalMapping l2gmap;
3415   PetscInt*              l2gmap_indices;
3416   const PetscInt*        is_indices;
3417   MatType                new_local_type;
3418   /* buffers */
3419   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3420   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3421   PetscInt               *recv_buffer_idxs_local;
3422   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3423   /* MPI */
3424   MPI_Comm               comm,comm_n;
3425   PetscSubcomm           subcomm;
3426   PetscMPIInt            n_sends,n_recvs,commsize;
3427   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3428   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3429   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3430   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3431   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3432   PetscErrorCode         ierr;
3433 
3434   PetscFunctionBegin;
3435   /* TODO: add missing checks */
3436   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3437   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3438   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3439   PetscValidLogicalCollectiveInt(mat,nis,7);
3440   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3441   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3442   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3443   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3444   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3445   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3446   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3447   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3448     PetscInt mrows,mcols,mnrows,mncols;
3449     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3450     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3451     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3452     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3453     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3454     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3455   }
3456   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3457   PetscValidLogicalCollectiveInt(mat,bs,0);
3458   /* prepare IS for sending if not provided */
3459   if (!is_sends) {
3460     PetscBool pcontig = PETSC_TRUE;
3461     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3462     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3463   } else {
3464     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3465     is_sends_internal = is_sends;
3466   }
3467 
3468   /* get pointer of MATIS data */
3469   matis = (Mat_IS*)mat->data;
3470 
3471   /* get comm */
3472   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3473 
3474   /* compute number of sends */
3475   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3476   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3477 
3478   /* compute number of receives */
3479   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3480   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3481   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3482   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3483   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3484   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3485   ierr = PetscFree(iflags);CHKERRQ(ierr);
3486 
3487   /* restrict comm if requested */
3488   subcomm = 0;
3489   destroy_mat = PETSC_FALSE;
3490   if (restrict_comm) {
3491     PetscMPIInt color,subcommsize;
3492 
3493     color = 0;
3494     if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */
3495     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3496     subcommsize = commsize - subcommsize;
3497     /* check if reuse has been requested */
3498     if (reuse == MAT_REUSE_MATRIX) {
3499       if (*mat_n) {
3500         PetscMPIInt subcommsize2;
3501         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3502         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3503         comm_n = PetscObjectComm((PetscObject)*mat_n);
3504       } else {
3505         comm_n = PETSC_COMM_SELF;
3506       }
3507     } else { /* MAT_INITIAL_MATRIX */
3508       PetscMPIInt rank;
3509 
3510       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3511       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3512       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3513       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3514       comm_n = PetscSubcommChild(subcomm);
3515     }
3516     /* flag to destroy *mat_n if not significative */
3517     if (color) destroy_mat = PETSC_TRUE;
3518   } else {
3519     comm_n = comm;
3520   }
3521 
3522   /* prepare send/receive buffers */
3523   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3524   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3525   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3526   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3527   if (nis) {
3528     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3529   }
3530 
3531   /* Get data from local matrices */
3532   if (!isdense) {
3533     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3534     /* TODO: See below some guidelines on how to prepare the local buffers */
3535     /*
3536        send_buffer_vals should contain the raw values of the local matrix
3537        send_buffer_idxs should contain:
3538        - MatType_PRIVATE type
3539        - PetscInt        size_of_l2gmap
3540        - PetscInt        global_row_indices[size_of_l2gmap]
3541        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3542     */
3543   } else {
3544     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3545     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3546     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3547     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3548     send_buffer_idxs[1] = i;
3549     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3550     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3551     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3552     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3553     for (i=0;i<n_sends;i++) {
3554       ilengths_vals[is_indices[i]] = len*len;
3555       ilengths_idxs[is_indices[i]] = len+2;
3556     }
3557   }
3558   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3559   /* additional is (if any) */
3560   if (nis) {
3561     PetscMPIInt psum;
3562     PetscInt j;
3563     for (j=0,psum=0;j<nis;j++) {
3564       PetscInt plen;
3565       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3566       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3567       psum += len+1; /* indices + lenght */
3568     }
3569     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3570     for (j=0,psum=0;j<nis;j++) {
3571       PetscInt plen;
3572       const PetscInt *is_array_idxs;
3573       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3574       send_buffer_idxs_is[psum] = plen;
3575       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3576       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3577       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3578       psum += plen+1; /* indices + lenght */
3579     }
3580     for (i=0;i<n_sends;i++) {
3581       ilengths_idxs_is[is_indices[i]] = psum;
3582     }
3583     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3584   }
3585 
3586   buf_size_idxs = 0;
3587   buf_size_vals = 0;
3588   buf_size_idxs_is = 0;
3589   for (i=0;i<n_recvs;i++) {
3590     buf_size_idxs += (PetscInt)olengths_idxs[i];
3591     buf_size_vals += (PetscInt)olengths_vals[i];
3592     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3593   }
3594   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3595   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3596   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3597 
3598   /* get new tags for clean communications */
3599   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3600   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3601   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3602 
3603   /* allocate for requests */
3604   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3605   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3606   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3607   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3608   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3609   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3610 
3611   /* communications */
3612   ptr_idxs = recv_buffer_idxs;
3613   ptr_vals = recv_buffer_vals;
3614   ptr_idxs_is = recv_buffer_idxs_is;
3615   for (i=0;i<n_recvs;i++) {
3616     source_dest = onodes[i];
3617     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3618     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3619     ptr_idxs += olengths_idxs[i];
3620     ptr_vals += olengths_vals[i];
3621     if (nis) {
3622       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3623       ptr_idxs_is += olengths_idxs_is[i];
3624     }
3625   }
3626   for (i=0;i<n_sends;i++) {
3627     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3628     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3629     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3630     if (nis) {
3631       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3632     }
3633   }
3634   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3635   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3636 
3637   /* assemble new l2g map */
3638   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3639   ptr_idxs = recv_buffer_idxs;
3640   new_local_rows = 0;
3641   for (i=0;i<n_recvs;i++) {
3642     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3643     ptr_idxs += olengths_idxs[i];
3644   }
3645   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3646   ptr_idxs = recv_buffer_idxs;
3647   new_local_rows = 0;
3648   for (i=0;i<n_recvs;i++) {
3649     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3650     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3651     ptr_idxs += olengths_idxs[i];
3652   }
3653   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3654   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3655   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3656 
3657   /* infer new local matrix type from received local matrices type */
3658   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3659   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3660   if (n_recvs) {
3661     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3662     ptr_idxs = recv_buffer_idxs;
3663     for (i=0;i<n_recvs;i++) {
3664       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3665         new_local_type_private = MATAIJ_PRIVATE;
3666         break;
3667       }
3668       ptr_idxs += olengths_idxs[i];
3669     }
3670     switch (new_local_type_private) {
3671       case MATDENSE_PRIVATE:
3672         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3673           new_local_type = MATSEQAIJ;
3674           bs = 1;
3675         } else { /* if I receive only 1 dense matrix */
3676           new_local_type = MATSEQDENSE;
3677           bs = 1;
3678         }
3679         break;
3680       case MATAIJ_PRIVATE:
3681         new_local_type = MATSEQAIJ;
3682         bs = 1;
3683         break;
3684       case MATBAIJ_PRIVATE:
3685         new_local_type = MATSEQBAIJ;
3686         break;
3687       case MATSBAIJ_PRIVATE:
3688         new_local_type = MATSEQSBAIJ;
3689         break;
3690       default:
3691         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3692         break;
3693     }
3694   } else { /* by default, new_local_type is seqdense */
3695     new_local_type = MATSEQDENSE;
3696     bs = 1;
3697   }
3698 
3699   /* create MATIS object if needed */
3700   if (reuse == MAT_INITIAL_MATRIX) {
3701     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3702     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3703   } else {
3704     /* it also destroys the local matrices */
3705     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3706   }
3707   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3708   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3709 
3710   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3711 
3712   /* Global to local map of received indices */
3713   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3714   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3715   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3716 
3717   /* restore attributes -> type of incoming data and its size */
3718   buf_size_idxs = 0;
3719   for (i=0;i<n_recvs;i++) {
3720     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3721     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3722     buf_size_idxs += (PetscInt)olengths_idxs[i];
3723   }
3724   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3725 
3726   /* set preallocation */
3727   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3728   if (!newisdense) {
3729     PetscInt *new_local_nnz=0;
3730 
3731     ptr_vals = recv_buffer_vals;
3732     ptr_idxs = recv_buffer_idxs_local;
3733     if (n_recvs) {
3734       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3735     }
3736     for (i=0;i<n_recvs;i++) {
3737       PetscInt j;
3738       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3739         for (j=0;j<*(ptr_idxs+1);j++) {
3740           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3741         }
3742       } else {
3743         /* TODO */
3744       }
3745       ptr_idxs += olengths_idxs[i];
3746     }
3747     if (new_local_nnz) {
3748       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3749       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3750       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3751       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3752       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3753       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3754     } else {
3755       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3756     }
3757     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3758   } else {
3759     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3760   }
3761 
3762   /* set values */
3763   ptr_vals = recv_buffer_vals;
3764   ptr_idxs = recv_buffer_idxs_local;
3765   for (i=0;i<n_recvs;i++) {
3766     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3767       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3768       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3769       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3770       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3771       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3772     } else {
3773       /* TODO */
3774     }
3775     ptr_idxs += olengths_idxs[i];
3776     ptr_vals += olengths_vals[i];
3777   }
3778   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3779   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3780   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3781   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3782   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3783   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3784 
3785 #if 0
3786   if (!restrict_comm) { /* check */
3787     Vec       lvec,rvec;
3788     PetscReal infty_error;
3789 
3790     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3791     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3792     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3793     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3794     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3795     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3796     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3797     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3798     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3799   }
3800 #endif
3801 
3802   /* assemble new additional is (if any) */
3803   if (nis) {
3804     PetscInt **temp_idxs,*count_is,j,psum;
3805 
3806     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3807     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3808     ptr_idxs = recv_buffer_idxs_is;
3809     psum = 0;
3810     for (i=0;i<n_recvs;i++) {
3811       for (j=0;j<nis;j++) {
3812         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3813         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3814         psum += plen;
3815         ptr_idxs += plen+1; /* shift pointer to received data */
3816       }
3817     }
3818     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3819     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3820     for (i=1;i<nis;i++) {
3821       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3822     }
3823     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3824     ptr_idxs = recv_buffer_idxs_is;
3825     for (i=0;i<n_recvs;i++) {
3826       for (j=0;j<nis;j++) {
3827         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3828         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3829         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3830         ptr_idxs += plen+1; /* shift pointer to received data */
3831       }
3832     }
3833     for (i=0;i<nis;i++) {
3834       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3835       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3836       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3837     }
3838     ierr = PetscFree(count_is);CHKERRQ(ierr);
3839     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3840     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3841   }
3842   /* free workspace */
3843   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3844   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3845   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3846   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3847   if (isdense) {
3848     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3849     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3850   } else {
3851     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3852   }
3853   if (nis) {
3854     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3855     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3856   }
3857   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3858   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3859   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3860   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3861   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3862   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3863   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3864   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3865   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3866   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3867   ierr = PetscFree(onodes);CHKERRQ(ierr);
3868   if (nis) {
3869     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3870     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3871     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3872   }
3873   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3874   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3875     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3876     for (i=0;i<nis;i++) {
3877       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3878     }
3879   }
3880   PetscFunctionReturn(0);
3881 }
3882 
3883 /* temporary hack into ksp private data structure */
3884 #include <petsc-private/kspimpl.h>
3885 
3886 #undef __FUNCT__
3887 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3888 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3889 {
3890   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3891   PC_IS                  *pcis = (PC_IS*)pc->data;
3892   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3893   MatNullSpace           CoarseNullSpace=NULL;
3894   ISLocalToGlobalMapping coarse_islg;
3895   IS                     coarse_is,*isarray;
3896   PetscInt               i,im_active=-1,active_procs=-1;
3897   PetscInt               nis,nisdofs,nisneu;
3898   PC                     pc_temp;
3899   PCType                 coarse_pc_type;
3900   KSPType                coarse_ksp_type;
3901   PetscBool              multilevel_requested,multilevel_allowed;
3902   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3903   Mat                    t_coarse_mat_is;
3904   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3905   PetscMPIInt            all_procs;
3906   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3907   PetscBool              compute_vecs = PETSC_FALSE;
3908   PetscScalar            *array;
3909   PetscErrorCode         ierr;
3910 
3911   PetscFunctionBegin;
3912   /* Assign global numbering to coarse dofs */
3913   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3914     PetscInt ocoarse_size;
3915     compute_vecs = PETSC_TRUE;
3916     ocoarse_size = pcbddc->coarse_size;
3917     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3918     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3919     /* see if we can avoid some work */
3920     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3921       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3922         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3923         coarse_reuse = PETSC_FALSE;
3924       } else { /* we can safely reuse already computed coarse matrix */
3925         coarse_reuse = PETSC_TRUE;
3926       }
3927     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3928       coarse_reuse = PETSC_FALSE;
3929     }
3930     /* reset any subassembling information */
3931     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3932     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3933   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3934     coarse_reuse = PETSC_TRUE;
3935   }
3936 
3937   /* count "active" (i.e. with positive local size) and "void" processes */
3938   im_active = !!(pcis->n);
3939   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3940   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3941   void_procs = all_procs-active_procs;
3942   csin_type_simple = PETSC_TRUE;
3943   redist = PETSC_FALSE;
3944   if (pcbddc->current_level && void_procs) {
3945     csin_ml = PETSC_TRUE;
3946     ncoarse_ml = void_procs;
3947     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
3948     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
3949       csin_ds = PETSC_TRUE;
3950       ncoarse_ds = pcbddc->redistribute_coarse;
3951       redist = PETSC_TRUE;
3952     } else {
3953       csin_ds = PETSC_TRUE;
3954       ncoarse_ds = active_procs;
3955       redist = PETSC_TRUE;
3956     }
3957   } else {
3958     csin_ml = PETSC_FALSE;
3959     ncoarse_ml = all_procs;
3960     if (void_procs) {
3961       csin_ds = PETSC_TRUE;
3962       ncoarse_ds = void_procs;
3963       csin_type_simple = PETSC_FALSE;
3964     } else {
3965       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
3966         csin_ds = PETSC_TRUE;
3967         ncoarse_ds = pcbddc->redistribute_coarse;
3968         redist = PETSC_TRUE;
3969       } else {
3970         csin_ds = PETSC_FALSE;
3971         ncoarse_ds = all_procs;
3972       }
3973     }
3974   }
3975 
3976   /*
3977     test if we can go multilevel: three conditions must be satisfied:
3978     - we have not exceeded the number of levels requested
3979     - we can actually subassemble the active processes
3980     - we can find a suitable number of MPI processes where we can place the subassembled problem
3981   */
3982   multilevel_allowed = PETSC_FALSE;
3983   multilevel_requested = PETSC_FALSE;
3984   if (pcbddc->current_level < pcbddc->max_levels) {
3985     multilevel_requested = PETSC_TRUE;
3986     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3987       multilevel_allowed = PETSC_FALSE;
3988     } else {
3989       multilevel_allowed = PETSC_TRUE;
3990     }
3991   }
3992   /* determine number of process partecipating to coarse solver */
3993   if (multilevel_allowed) {
3994     ncoarse = ncoarse_ml;
3995     csin = csin_ml;
3996     redist = PETSC_FALSE;
3997   } else {
3998     ncoarse = ncoarse_ds;
3999     csin = csin_ds;
4000   }
4001 
4002   /* creates temporary l2gmap and IS for coarse indexes */
4003   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
4004   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
4005 
4006   /* creates temporary MATIS object for coarse matrix */
4007   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
4008   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4009   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
4010   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4011 #if 0
4012   {
4013     PetscViewer viewer;
4014     char filename[256];
4015     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
4016     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
4017     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4018     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
4019     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4020   }
4021 #endif
4022   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
4023   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
4024   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4025   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4026   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
4027 
4028   /* compute dofs splitting and neumann boundaries for coarse dofs */
4029   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
4030     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
4031     const PetscInt         *idxs;
4032     ISLocalToGlobalMapping tmap;
4033 
4034     /* create map between primal indices (in local representative ordering) and local primal numbering */
4035     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
4036     /* allocate space for temporary storage */
4037     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
4038     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
4039     /* allocate for IS array */
4040     nisdofs = pcbddc->n_ISForDofsLocal;
4041     nisneu = !!pcbddc->NeumannBoundariesLocal;
4042     nis = nisdofs + nisneu;
4043     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
4044     /* dofs splitting */
4045     for (i=0;i<nisdofs;i++) {
4046       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4047       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4048       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4049       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4050       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4051       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4052       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4053       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4054     }
4055     /* neumann boundaries */
4056     if (pcbddc->NeumannBoundariesLocal) {
4057       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4058       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4059       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4060       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4061       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4062       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4063       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4064       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4065     }
4066     /* free memory */
4067     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4068     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4069     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4070   } else {
4071     nis = 0;
4072     nisdofs = 0;
4073     nisneu = 0;
4074     isarray = NULL;
4075   }
4076   /* destroy no longer needed map */
4077   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4078 
4079   /* restrict on coarse candidates (if needed) */
4080   coarse_mat_is = NULL;
4081   if (csin) {
4082     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4083       if (redist) {
4084         PetscMPIInt rank;
4085         PetscInt    spc,n_spc_p1,dest[1],destsize;
4086 
4087         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4088         spc = active_procs/ncoarse;
4089         n_spc_p1 = active_procs%ncoarse;
4090         if (im_active) {
4091           destsize = 1;
4092           if (rank > n_spc_p1*(spc+1)-1) {
4093             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4094           } else {
4095             dest[0] = rank/(spc+1);
4096           }
4097         } else {
4098           destsize = 0;
4099         }
4100         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4101       } else if (csin_type_simple) {
4102         PetscMPIInt rank;
4103         PetscInt    issize,isidx;
4104 
4105         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4106         if (im_active) {
4107           issize = 1;
4108           isidx = (PetscInt)rank;
4109         } else {
4110           issize = 0;
4111           isidx = -1;
4112         }
4113         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4114       } else { /* get a suitable subassembling pattern from MATIS code */
4115         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4116       }
4117 
4118       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4119       if (!redist || ncoarse <= void_procs) {
4120         PetscInt ncoarse_cand,tissize,*nisindices;
4121         PetscInt *coarse_candidates;
4122         const PetscInt* tisindices;
4123 
4124         /* get coarse candidates' ranks in pc communicator */
4125         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4126         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4127         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4128           if (!coarse_candidates[i]) {
4129             coarse_candidates[ncoarse_cand++]=i;
4130           }
4131         }
4132         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4133 
4134 
4135         if (pcbddc->dbg_flag) {
4136           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4137           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4138           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4139           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4140           for (i=0;i<ncoarse_cand;i++) {
4141             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4142           }
4143           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4144           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4145         }
4146         /* shift the pattern on coarse candidates */
4147         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4148         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4149         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4150         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4151         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4152         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4153         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4154       }
4155       if (pcbddc->dbg_flag) {
4156         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4157         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4158         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4159         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4160       }
4161     }
4162     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4163     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4164   } else {
4165     if (pcbddc->dbg_flag) {
4166       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4167       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4168       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4169     }
4170     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4171     coarse_mat_is = t_coarse_mat_is;
4172   }
4173 
4174   /* create local to global scatters for coarse problem */
4175   if (compute_vecs) {
4176     PetscInt lrows;
4177     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4178     if (coarse_mat_is) {
4179       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4180     } else {
4181       lrows = 0;
4182     }
4183     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4184     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4185     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4186     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4187     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4188   }
4189   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4190   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4191 
4192   /* set defaults for coarse KSP and PC */
4193   if (multilevel_allowed) {
4194     coarse_ksp_type = KSPRICHARDSON;
4195     coarse_pc_type = PCBDDC;
4196   } else {
4197     coarse_ksp_type = KSPPREONLY;
4198     coarse_pc_type = PCREDUNDANT;
4199   }
4200 
4201   /* print some info if requested */
4202   if (pcbddc->dbg_flag) {
4203     if (!multilevel_allowed) {
4204       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4205       if (multilevel_requested) {
4206         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4207       } else if (pcbddc->max_levels) {
4208         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4209       }
4210       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4211     }
4212   }
4213 
4214   /* create the coarse KSP object only once with defaults */
4215   if (coarse_mat_is) {
4216     MatReuse coarse_mat_reuse;
4217     PetscViewer dbg_viewer = NULL;
4218     if (pcbddc->dbg_flag) {
4219       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4220       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4221     }
4222     if (!pcbddc->coarse_ksp) {
4223       char prefix[256],str_level[16];
4224       size_t len;
4225       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4226       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4227       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4228       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4229       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4230       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4231       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4232       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4233       /* prefix */
4234       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4235       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4236       if (!pcbddc->current_level) {
4237         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4238         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4239       } else {
4240         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4241         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4242         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4243         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4244         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4245         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4246       }
4247       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4248       /* allow user customization */
4249       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4250       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4251     }
4252 
4253     /* get some info after set from options */
4254     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4255     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4256     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4257     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4258     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4259       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4260       isbddc = PETSC_FALSE;
4261     }
4262     if (isredundant) {
4263       KSP inner_ksp;
4264       PC inner_pc;
4265       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4266       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4267       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4268     }
4269 
4270     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4271     ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4272     ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4273     ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4274     if (nisdofs) {
4275       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4276       for (i=0;i<nisdofs;i++) {
4277         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4278       }
4279     }
4280     if (nisneu) {
4281       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4282       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4283     }
4284 
4285     /* assemble coarse matrix */
4286     if (coarse_reuse) {
4287       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4288       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4289       coarse_mat_reuse = MAT_REUSE_MATRIX;
4290     } else {
4291       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4292     }
4293     if (isbddc || isnn) {
4294       if (pcbddc->coarsening_ratio > 1) {
4295         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4296           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4297           if (pcbddc->dbg_flag) {
4298             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4299             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4300             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4301             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4302           }
4303         }
4304         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4305       } else {
4306         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4307         coarse_mat = coarse_mat_is;
4308       }
4309     } else {
4310       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4311     }
4312     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4313 
4314     /* propagate symmetry info to coarse matrix */
4315     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
4316     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4317 
4318     /* set operators */
4319     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4320     if (pcbddc->dbg_flag) {
4321       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4322     }
4323   } else { /* processes non partecipating to coarse solver (if any) */
4324     coarse_mat = 0;
4325   }
4326   ierr = PetscFree(isarray);CHKERRQ(ierr);
4327 #if 0
4328   {
4329     PetscViewer viewer;
4330     char filename[256];
4331     sprintf(filename,"coarse_mat.m");
4332     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4333     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4334     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4335     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4336   }
4337 #endif
4338 
4339   /* Compute coarse null space (special handling by BDDC only) */
4340   if (pcbddc->NullSpace) {
4341     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4342   }
4343 
4344   if (pcbddc->coarse_ksp) {
4345     Vec crhs,csol;
4346     PetscBool ispreonly;
4347     if (CoarseNullSpace) {
4348       if (isbddc) {
4349         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4350       } else {
4351         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4352       }
4353     }
4354     /* setup coarse ksp */
4355     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4356     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4357     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4358     /* hack */
4359     if (!csol) {
4360       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4361     }
4362     if (!crhs) {
4363       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4364     }
4365     /* Check coarse problem if in debug mode or if solving with an iterative method */
4366     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4367     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4368       KSP       check_ksp;
4369       KSPType   check_ksp_type;
4370       PC        check_pc;
4371       Vec       check_vec,coarse_vec;
4372       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4373       PetscInt  its;
4374       PetscBool compute_eigs;
4375       PetscReal *eigs_r,*eigs_c;
4376       PetscInt  neigs;
4377       const char *prefix;
4378 
4379       /* Create ksp object suitable for estimation of extreme eigenvalues */
4380       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4381       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4382       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4383       if (ispreonly) {
4384         check_ksp_type = KSPPREONLY;
4385         compute_eigs = PETSC_FALSE;
4386       } else {
4387         check_ksp_type = KSPGMRES;
4388         compute_eigs = PETSC_TRUE;
4389       }
4390       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4391       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4392       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4393       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4394       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4395       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4396       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4397       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4398       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4399       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4400       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4401       /* create random vec */
4402       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4403       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4404       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4405       if (CoarseNullSpace) {
4406         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4407       }
4408       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4409       /* solve coarse problem */
4410       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4411       if (CoarseNullSpace) {
4412         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4413       }
4414       /* set eigenvalue estimation if preonly has not been requested */
4415       if (compute_eigs) {
4416         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4417         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4418         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4419         lambda_max = eigs_r[neigs-1];
4420         lambda_min = eigs_r[0];
4421         if (pcbddc->use_coarse_estimates) {
4422           if (lambda_max>lambda_min) {
4423             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4424             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4425           }
4426         }
4427       }
4428 
4429       /* check coarse problem residual error */
4430       if (pcbddc->dbg_flag) {
4431         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4432         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4433         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4434         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4435         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4436         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4437         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4438         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4439         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4440         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4441         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4442         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4443         if (compute_eigs) {
4444           PetscReal lambda_max_s,lambda_min_s;
4445           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4446           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4447           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4448           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4449           for (i=0;i<neigs;i++) {
4450             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4451           }
4452         }
4453         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4454         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4455       }
4456       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4457       if (compute_eigs) {
4458         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4459         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4460       }
4461     }
4462   }
4463   /* print additional info */
4464   if (pcbddc->dbg_flag) {
4465     /* waits until all processes reaches this point */
4466     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4467     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4468     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4469   }
4470 
4471   /* free memory */
4472   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4473   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4474   PetscFunctionReturn(0);
4475 }
4476 
4477 #undef __FUNCT__
4478 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4479 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4480 {
4481   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4482   PC_IS*         pcis = (PC_IS*)pc->data;
4483   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4484   PetscInt       i,coarse_size;
4485   PetscInt       *local_primal_indices;
4486   PetscErrorCode ierr;
4487 
4488   PetscFunctionBegin;
4489   /* Compute global number of coarse dofs */
4490   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
4491     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
4492   }
4493   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
4494 
4495   /* check numbering */
4496   if (pcbddc->dbg_flag) {
4497     PetscScalar coarsesum,*array;
4498     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4499 
4500     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4501     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4502     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4503     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4504     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4505     for (i=0;i<pcbddc->local_primal_size;i++) {
4506       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4507     }
4508     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4509     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4510     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4511     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4512     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4513     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4514     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4515     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4516     for (i=0;i<pcis->n;i++) {
4517       if (array[i] == 1.0) {
4518         set_error = PETSC_TRUE;
4519         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4520       }
4521     }
4522     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4523     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4524     for (i=0;i<pcis->n;i++) {
4525       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4526     }
4527     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4528     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4529     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4530     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4531     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4532     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4533     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4534       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4535       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4536       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4537       for (i=0;i<pcbddc->local_primal_size;i++) {
4538         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
4539       }
4540       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4541     }
4542     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4543     if (set_error_reduced) {
4544       SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4545     }
4546   }
4547   /* get back data */
4548   *coarse_size_n = coarse_size;
4549   *local_primal_indices_n = local_primal_indices;
4550   PetscFunctionReturn(0);
4551 }
4552 
4553 #undef __FUNCT__
4554 #define __FUNCT__ "PCBDDCGlobalToLocal"
4555 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4556 {
4557   IS             localis_t;
4558   PetscInt       i,lsize,*idxs,n;
4559   PetscScalar    *vals;
4560   PetscErrorCode ierr;
4561 
4562   PetscFunctionBegin;
4563   /* get indices in local ordering exploiting local to global map */
4564   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4565   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4566   for (i=0;i<lsize;i++) vals[i] = 1.0;
4567   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4568   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4569   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4570   if (idxs) { /* multilevel guard */
4571     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4572   }
4573   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4574   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4575   ierr = PetscFree(vals);CHKERRQ(ierr);
4576   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4577   /* now compute set in local ordering */
4578   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4579   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4580   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4581   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4582   for (i=0,lsize=0;i<n;i++) {
4583     if (PetscRealPart(vals[i]) > 0.5) {
4584       lsize++;
4585     }
4586   }
4587   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4588   for (i=0,lsize=0;i<n;i++) {
4589     if (PetscRealPart(vals[i]) > 0.5) {
4590       idxs[lsize++] = i;
4591     }
4592   }
4593   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4594   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4595   *localis = localis_t;
4596   PetscFunctionReturn(0);
4597 }
4598 
4599 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4600 #undef __FUNCT__
4601 #define __FUNCT__ "PCBDDCMatMult_Private"
4602 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4603 {
4604   PCBDDCChange_ctx change_ctx;
4605   PetscErrorCode   ierr;
4606 
4607   PetscFunctionBegin;
4608   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4609   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4610   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4611   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4612   PetscFunctionReturn(0);
4613 }
4614 
4615 #undef __FUNCT__
4616 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4617 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4618 {
4619   PCBDDCChange_ctx change_ctx;
4620   PetscErrorCode   ierr;
4621 
4622   PetscFunctionBegin;
4623   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4624   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4625   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4626   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4627   PetscFunctionReturn(0);
4628 }
4629 
4630 #undef __FUNCT__
4631 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4632 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4633 {
4634   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4635   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4636   PetscInt            *used_xadj,*used_adjncy;
4637   PetscBool           free_used_adj;
4638   PetscErrorCode      ierr;
4639 
4640   PetscFunctionBegin;
4641   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4642   free_used_adj = PETSC_FALSE;
4643   if (pcbddc->sub_schurs_layers == -1) {
4644     used_xadj = NULL;
4645     used_adjncy = NULL;
4646   } else {
4647     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4648       used_xadj = pcbddc->mat_graph->xadj;
4649       used_adjncy = pcbddc->mat_graph->adjncy;
4650     } else if (pcbddc->computed_rowadj) {
4651       used_xadj = pcbddc->mat_graph->xadj;
4652       used_adjncy = pcbddc->mat_graph->adjncy;
4653     } else {
4654       PetscBool      flg_row=PETSC_FALSE;
4655       const PetscInt *xadj,*adjncy;
4656       PetscInt       nvtxs;
4657 
4658       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4659       if (flg_row) {
4660         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4661         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4662         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4663         free_used_adj = PETSC_TRUE;
4664       } else {
4665         pcbddc->sub_schurs_layers = -1;
4666         used_xadj = NULL;
4667         used_adjncy = NULL;
4668       }
4669       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4670     }
4671   }
4672   ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->adaptive_selection,pcbddc->use_deluxe_scaling,pcbddc->adaptive_invert_Stildas,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4673 
4674   /* free adjacency */
4675   if (free_used_adj) {
4676     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4677   }
4678   PetscFunctionReturn(0);
4679 }
4680 
4681 #undef __FUNCT__
4682 #define __FUNCT__ "PCBDDCInitSubSchurs"
4683 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4684 {
4685   PC_IS               *pcis=(PC_IS*)pc->data;
4686   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4687   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4688   PCBDDCGraph         graph;
4689   Mat                 S_j;
4690   PetscErrorCode      ierr;
4691 
4692   PetscFunctionBegin;
4693   /* attach interface graph for determining subsets */
4694   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4695     IS verticesIS;
4696 
4697     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4698     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4699     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr);
4700     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr);
4701     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4702     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4703 /*
4704     if (pcbddc->dbg_flag) {
4705       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4706     }
4707 */
4708   } else {
4709     graph = pcbddc->mat_graph;
4710   }
4711 
4712   /* Create Schur complement matrix */
4713   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4714   ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4715 
4716   /* sub_schurs init */
4717   ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_threshold);CHKERRQ(ierr);
4718   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4719   /* free graph struct */
4720   if (pcbddc->sub_schurs_rebuild) {
4721     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4722   }
4723   PetscFunctionReturn(0);
4724 }
4725