xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision b3d85658d0c297d01fe7f278fbc260addb7330eb)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscReal       *eigs,thresh;
20   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
21   PetscBool       allocated_S_St;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscErrorCode  ierr;
26 
27   PetscFunctionBegin;
28   if (!sub_schurs->use_mumps) {
29     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
30   }
31 
32   if (pcbddc->dbg_flag) {
33     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
34     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
35     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
36     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
37   }
38 
39   if (pcbddc->dbg_flag) {
40     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
41   }
42 
43   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
44     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
45   }
46 
47   /* max size of subsets */
48   mss = 0;
49   for (i=0;i<sub_schurs->n_subs;i++) {
50     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
51       PetscInt subset_size;
52       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
53       mss = PetscMax(mss,subset_size);
54     }
55   }
56 
57   /* min/max and threshold */
58   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
59   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
60   nmax = PetscMax(nmin,nmax);
61   allocated_S_St = PETSC_FALSE;
62   if (nmin) {
63     allocated_S_St = PETSC_TRUE;
64   }
65 
66   /* allocate lapack workspace */
67   cum = cum2 = 0;
68   maxneigs = 0;
69   for (i=0;i<sub_schurs->n_subs;i++) {
70     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
71       PetscInt n,subset_size;
72 
73       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
74       n = PetscMin(subset_size,nmax);
75       cum += subset_size;
76       cum2 += subset_size*n;
77       maxneigs = PetscMax(maxneigs,n);
78     }
79   }
80   if (mss) {
81     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
82       PetscBLASInt B_itype = 1;
83       PetscBLASInt B_N = mss;
84       PetscReal    zero = 0.0;
85       PetscReal    eps = 0.0; /* dlamch? */
86 
87       B_lwork = -1;
88       S = NULL;
89       St = NULL;
90       eigs = NULL;
91       eigv = NULL;
92       B_iwork = NULL;
93       B_ifail = NULL;
94       thresh = 1.0;
95       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
96 #if defined(PETSC_USE_COMPLEX)
97       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
98 #else
99       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
100 #endif
101       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
102       ierr = PetscFPTrapPop();CHKERRQ(ierr);
103     } else {
104         /* TODO */
105     }
106   } else {
107     lwork = 0;
108   }
109 
110   nv = 0;
111   if (sub_schurs->is_Ej_com && pcbddc->use_vertices) { /* complement of subsets, each entry is a vertex */
112     ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr);
113   }
114   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
115   if (allocated_S_St) {
116     ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr);
117   }
118   ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
119 #if defined(PETSC_USE_COMPLEX)
120   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
121 #endif
122   ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
123                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
124                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
125                       nv+cum,&pcbddc->adaptive_constraints_idxs,
126                       nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
127   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
128 
129   maxneigs = 0;
130   cum = cum2 = cumarray = 0;
131   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
132   pcbddc->adaptive_constraints_data_ptr[0] = 0;
133   if (sub_schurs->is_Ej_com && pcbddc->use_vertices) {
134     const PetscInt *idxs;
135 
136     ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
137     for (cum=0;cum<nv;cum++) {
138       pcbddc->adaptive_constraints_n[cum] = 1;
139       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
140       pcbddc->adaptive_constraints_data[cum] = 1.0;
141       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
142       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
143     }
144     cum2 = cum;
145     ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
146   }
147 
148   if (mss) { /* multilevel */
149     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
150     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
151   }
152 
153   for (i=0;i<sub_schurs->n_subs;i++) {
154     PetscInt j,subset_size;
155 
156     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
157     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
158       const PetscInt *idxs;
159       PetscReal      infty = PETSC_MAX_REAL;
160       PetscInt       eigs_start = 0;
161       PetscBLASInt   B_N;
162 
163       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
164       if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
165         if (sub_schurs->is_hermitian) {
166           PetscInt j;
167           for (j=0;j<subset_size;j++) {
168             ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
169           }
170           for (j=0;j<subset_size;j++) {
171             ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
172           }
173         } else {
174           ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
175           ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
176         }
177       } else {
178         S = Sarray + cumarray;
179         St = Starray + cumarray;
180       }
181 
182       /* Threshold: this is an heuristic for edges */
183       ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
184       thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold;
185 
186       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
187         PetscBLASInt B_itype = 1;
188         PetscBLASInt B_IL, B_IU;
189         PetscReal    eps = -1.0; /* dlamch? */
190         PetscInt     nmin_s;
191 
192         /* ask for eigenvalues larger than thresh */
193         if (pcbddc->dbg_flag) {
194           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
195         }
196         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
197 #if defined(PETSC_USE_COMPLEX)
198         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
199 #else
200         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
201 #endif
202         ierr = PetscFPTrapPop();CHKERRQ(ierr);
203         if (B_ierr) {
204           if (B_ierr < 0 ) {
205             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
206           } else if (B_ierr <= B_N) {
207             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
208           } else {
209             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
210           }
211         }
212 
213         if (B_neigs > nmax) {
214           if (pcbddc->dbg_flag) {
215             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
216           }
217           eigs_start = B_neigs -nmax;
218           B_neigs = nmax;
219         }
220 
221         nmin_s = PetscMin(nmin,B_N);
222         if (B_neigs < nmin_s) {
223           PetscBLASInt B_neigs2;
224 
225           B_IU = B_N - B_neigs;
226           B_IL = B_N - nmin_s + 1;
227           if (pcbddc->dbg_flag) {
228             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
229           }
230           if (sub_schurs->is_hermitian) {
231             PetscInt j;
232             for (j=0;j<subset_size;j++) {
233               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
234             }
235             for (j=0;j<subset_size;j++) {
236               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
237             }
238           } else {
239             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
240             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
241           }
242           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
243 #if defined(PETSC_USE_COMPLEX)
244           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
245 #else
246           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
247 #endif
248           ierr = PetscFPTrapPop();CHKERRQ(ierr);
249           B_neigs += B_neigs2;
250         }
251         if (B_ierr) {
252           if (B_ierr < 0 ) {
253             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
254           } else if (B_ierr <= B_N) {
255             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
256           } else {
257             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
258           }
259         }
260         if (pcbddc->dbg_flag) {
261           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);
262           for (j=0;j<B_neigs;j++) {
263             if (eigs[j] == 0.0) {
264               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");
265             } else {
266               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);
267             }
268           }
269         }
270       } else {
271           /* TODO */
272       }
273 
274       maxneigs = PetscMax(B_neigs,maxneigs);
275       pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
276       if (B_neigs) {
277         ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
278 
279         if (pcbddc->dbg_flag > 1) {
280           PetscInt ii;
281           for (ii=0;ii<B_neigs;ii++) {
282             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
283             for (j=0;j<B_N;j++) {
284               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
285             }
286           }
287         }
288 #if 0
289         for (j=0;j<B_neigs;j++) {
290           PetscBLASInt Blas_N,Blas_one = 1.0;
291           PetscScalar norm;
292           ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
293           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,
294                                                      &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
295           if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
296             norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
297           } else {
298             norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
299           }
300           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
301         }
302 #endif
303         ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
304         pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
305         pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
306         cum++;
307       }
308       ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
309     }
310     /* shift for next computation */
311     cumarray += subset_size*subset_size;
312   }
313   if (pcbddc->dbg_flag) {
314     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
315   }
316 
317   if (mss) {
318     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
319     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
320     /* destroy matrices (junk) */
321     ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
322     ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
323   }
324   if (allocated_S_St) {
325     ierr = PetscFree2(S,St);CHKERRQ(ierr);
326   }
327   ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
328 #if defined(PETSC_USE_COMPLEX)
329   ierr = PetscFree(rwork);CHKERRQ(ierr);
330 #endif
331   if (pcbddc->dbg_flag) {
332     PetscInt maxneigs_r;
333     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
334     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
335   }
336   PetscFunctionReturn(0);
337 }
338 
339 #undef __FUNCT__
340 #define __FUNCT__ "PCBDDCSetUpSolvers"
341 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
342 {
343   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
344   PetscScalar    *coarse_submat_vals;
345   PetscErrorCode ierr;
346 
347   PetscFunctionBegin;
348   /* Setup local scatters R_to_B and (optionally) R_to_D */
349   /* PCBDDCSetUpLocalWorkVectors should be called first! */
350   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
351 
352   /* Setup local neumann solver ksp_R */
353   /* PCBDDCSetUpLocalScatters should be called first! */
354   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
355 
356   /* Change global null space passed in by the user if change of basis has been requested */
357   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
358     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
359   }
360 
361   /*
362      Setup local correction and local part of coarse basis.
363      Gives back the dense local part of the coarse matrix in column major ordering
364   */
365   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
366 
367   /* Compute total number of coarse nodes and setup coarse solver */
368   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
369 
370   /* free */
371   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
372   PetscFunctionReturn(0);
373 }
374 
375 #undef __FUNCT__
376 #define __FUNCT__ "PCBDDCResetCustomization"
377 PetscErrorCode PCBDDCResetCustomization(PC pc)
378 {
379   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
380   PetscErrorCode ierr;
381 
382   PetscFunctionBegin;
383   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
384   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
385   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
386   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
387   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
388   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
389   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
390   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
391   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
392   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
393   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
394   PetscFunctionReturn(0);
395 }
396 
397 #undef __FUNCT__
398 #define __FUNCT__ "PCBDDCResetTopography"
399 PetscErrorCode PCBDDCResetTopography(PC pc)
400 {
401   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
402   PetscErrorCode ierr;
403 
404   PetscFunctionBegin;
405   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
406   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
407   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
408   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
409   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
410   PetscFunctionReturn(0);
411 }
412 
413 #undef __FUNCT__
414 #define __FUNCT__ "PCBDDCResetSolvers"
415 PetscErrorCode PCBDDCResetSolvers(PC pc)
416 {
417   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
418   PetscScalar    *array;
419   PetscErrorCode ierr;
420 
421   PetscFunctionBegin;
422   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
423   if (pcbddc->coarse_phi_B) {
424     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
425     ierr = PetscFree(array);CHKERRQ(ierr);
426   }
427   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
428   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
429   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
430   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
431   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
432   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
433   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
434   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
435   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
436   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
437   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
438   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
439   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
440   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
441   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
442   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
443   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
444   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
445   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
446   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
447   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
448   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
449   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
450   PetscFunctionReturn(0);
451 }
452 
453 #undef __FUNCT__
454 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
455 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
456 {
457   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
458   PC_IS          *pcis = (PC_IS*)pc->data;
459   VecType        impVecType;
460   PetscInt       n_constraints,n_R,old_size;
461   PetscErrorCode ierr;
462 
463   PetscFunctionBegin;
464   if (!pcbddc->ConstraintMatrix) {
465     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
466   }
467   /* get sizes */
468   n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices;
469   n_R = pcis->n-pcbddc->n_vertices;
470   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
471   /* local work vectors (try to avoid unneeded work)*/
472   /* R nodes */
473   old_size = -1;
474   if (pcbddc->vec1_R) {
475     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
476   }
477   if (n_R != old_size) {
478     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
479     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
480     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
481     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
482     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
483     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
484   }
485   /* local primal dofs */
486   old_size = -1;
487   if (pcbddc->vec1_P) {
488     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
489   }
490   if (pcbddc->local_primal_size != old_size) {
491     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
492     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
493     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
494     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
495   }
496   /* local explicit constraints */
497   old_size = -1;
498   if (pcbddc->vec1_C) {
499     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
500   }
501   if (n_constraints && n_constraints != old_size) {
502     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
503     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
504     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
505     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
506   }
507   PetscFunctionReturn(0);
508 }
509 
510 #undef __FUNCT__
511 #define __FUNCT__ "PCBDDCSetUpCorrection"
512 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
513 {
514   PetscErrorCode         ierr;
515   /* pointers to pcis and pcbddc */
516   PC_IS*                 pcis = (PC_IS*)pc->data;
517   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
518   /* submatrices of local problem */
519   Mat                    A_RV,A_VR,A_VV,local_auxmat2_R;
520   /* submatrices of local coarse problem */
521   Mat                    S_VV,S_CV,S_VC,S_CC;
522   /* working matrices */
523   Mat                    C_CR;
524   /* additional working stuff */
525   PC                     pc_R;
526   Mat                    F;
527   PetscBool              isLU,isCHOL,isILU;
528 
529   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
530   PetscScalar            *work;
531   PetscInt               *idx_V_B;
532   PetscInt               n,n_vertices,n_constraints;
533   PetscInt               i,n_R,n_D,n_B;
534   PetscBool              unsymmetric_check;
535   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
536   MatType                impMatType;
537   /* some shortcuts to scalars */
538   PetscScalar            one=1.0,m_one=-1.0;
539 
540   PetscFunctionBegin;
541   n_vertices = pcbddc->n_vertices;
542   n_constraints = pcbddc->local_primal_size-n_vertices;
543   /* Set Non-overlapping dimensions */
544   n_B = pcis->n_B;
545   n_D = pcis->n - n_B;
546   n_R = pcis->n - n_vertices;
547 
548   /* Set types for local objects needed by BDDC precondtioner */
549   impMatType = MATSEQDENSE;
550 
551   /* vertices in boundary numbering */
552   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
553   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr);
554   if (i != n_vertices) {
555     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
556   }
557 
558   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
559   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
560   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
561   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
562   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
563   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
564   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
565   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
566   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
567   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
568 
569   unsymmetric_check = PETSC_FALSE;
570   /* allocate workspace */
571   n = 0;
572   if (n_constraints) {
573     n += n_R*n_constraints;
574   }
575   if (n_vertices) {
576     n = PetscMax(2*n_R*n_vertices,n);
577     n = PetscMax((n_R+n_B)*n_vertices,n);
578   }
579   if (!pcbddc->issym) {
580     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
581     unsymmetric_check = PETSC_TRUE;
582   }
583   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
584 
585   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
586   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
587   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
588   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
589   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
590   if (isLU || isILU || isCHOL) {
591     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
592   } else {
593     F = NULL;
594   }
595 
596   /* Precompute stuffs needed for preprocessing and application of BDDC*/
597   if (n_constraints) {
598     Mat         M1,M2,M3;
599     Mat         auxmat;
600     IS          is_aux;
601     PetscScalar *array,*array2;
602 
603     ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
604     ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
605 
606     /* Extract constraints on R nodes: C_{CR}  */
607     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
608     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
609     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr);
610 
611     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
612     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
613     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
614     for (i=0;i<n_constraints;i++) {
615       const PetscScalar *row_cmat_values;
616       const PetscInt    *row_cmat_indices;
617       PetscInt          size_of_constraint,j;
618 
619       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
620       for (j=0;j<size_of_constraint;j++) {
621         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
622       }
623       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
624     }
625     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr);
626     if (F) {
627       Mat B;
628 
629       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
630       ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr);
631       ierr = MatDestroy(&B);CHKERRQ(ierr);
632     } else {
633       PetscScalar *marr;
634 
635       ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
636       for (i=0;i<n_constraints;i++) {
637         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
638         ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr);
639         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
640         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
641         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
642       }
643       ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
644     }
645     if (!pcbddc->switch_static) {
646       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr);
647       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
648       ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
649       for (i=0;i<n_constraints;i++) {
650         ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr);
651         ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr);
652         ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
653         ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
654         ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
655         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
656       }
657       ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
658       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
659       ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
660     } else {
661       ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr);
662       pcbddc->local_auxmat2 = local_auxmat2_R;
663       ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
664     }
665     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
666     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
667     ierr = MatScale(M3,m_one);CHKERRQ(ierr);
668     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
669     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
670     if (isCHOL) {
671       ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr);
672     } else {
673       ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
674     }
675     ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr);
676     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
677     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
678     ierr = MatDestroy(&M2);CHKERRQ(ierr);
679     ierr = MatDestroy(&M3);CHKERRQ(ierr);
680     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
681     ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
682     ierr = MatDestroy(&auxmat);CHKERRQ(ierr);
683     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
684     ierr = MatDestroy(&M1);CHKERRQ(ierr);
685   }
686   /* Get submatrices from subdomain matrix */
687   if (n_vertices) {
688     Mat       newmat;
689     IS        is_aux;
690     PetscInt  ibs,mbs;
691     PetscBool issbaij;
692 
693     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
694     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
695     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
696     if (ibs != mbs) { /* need to convert to SEQAIJ */
697       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
698       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
699       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
700       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
701       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
702       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
703     } else {
704       /* this is safe */
705       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
706       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
707       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
708         ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
709         ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
710         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
711         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
712         ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
713         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
714       } else {
715         ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
716         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
717         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
718       }
719       /* need to convert to SeqAIJ for MatMatMult with dense matrix */
720       ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
721     }
722     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
723   }
724 
725   /* Matrix of coarse basis functions (local) */
726   if (pcbddc->coarse_phi_B) {
727     PetscInt on_B,on_primal,on_D=n_D;
728     if (pcbddc->coarse_phi_D) {
729       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
730     }
731     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
732     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
733       PetscScalar *marray;
734 
735       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
736       ierr = PetscFree(marray);CHKERRQ(ierr);
737       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
738       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
739       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
740       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
741     }
742   }
743 
744   if (!pcbddc->coarse_phi_B) {
745     PetscScalar *marray;
746 
747     n = n_B*pcbddc->local_primal_size;
748     if (pcbddc->switch_static || pcbddc->dbg_flag) {
749       n += n_D*pcbddc->local_primal_size;
750     }
751     if (!pcbddc->issym) {
752       n *= 2;
753     }
754     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
755     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
756     n = n_B*pcbddc->local_primal_size;
757     if (pcbddc->switch_static || pcbddc->dbg_flag) {
758       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
759       n += n_D*pcbddc->local_primal_size;
760     }
761     if (!pcbddc->issym) {
762       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
763       if (pcbddc->switch_static || pcbddc->dbg_flag) {
764         n = n_B*pcbddc->local_primal_size;
765         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
766       }
767     } else {
768       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
769       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
770       if (pcbddc->switch_static || pcbddc->dbg_flag) {
771         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
772         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
773       }
774     }
775   }
776   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
777   /* vertices */
778   if (n_vertices) {
779 
780     if (n_R) {
781       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
782       PetscBLASInt B_N,B_one = 1;
783       PetscScalar  *x,*y;
784 
785       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
786       ierr = MatScale(A_RV,m_one);CHKERRQ(ierr);
787       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
788       if (F) {
789         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
790       } else {
791         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
792         for (i=0;i<n_vertices;i++) {
793           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
794           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
795           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
796           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
797           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
798         }
799         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
800       }
801       ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
802       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
803       if (n_constraints) {
804         Mat B;
805 
806         ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
807         for (i=0;i<n_vertices;i++) {
808           ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
809           ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr);
810           ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
811           ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
812           ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
813           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
814         }
815         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
816         ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
817         ierr = MatDestroy(&B);CHKERRQ(ierr);
818         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
819         ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
820         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
821         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
822         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
823         ierr = MatDestroy(&B);CHKERRQ(ierr);
824       }
825       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
826       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
827       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
828       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
829       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
830       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
831       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
832       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
833       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
834       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
835     } else {
836       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
837     }
838     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
839     /* coarse basis functions */
840     for (i=0;i<n_vertices;i++) {
841       PetscScalar *y;
842 
843       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
844       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
845       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
846       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
847       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
848       y[n_B*i+idx_V_B[i]] = 1.0;
849       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
850       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
851 
852       if (pcbddc->switch_static || pcbddc->dbg_flag) {
853         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
854         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
855         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
856         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
857         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
858         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
859       }
860       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
861     }
862   }
863 
864   if (n_constraints) {
865     Mat B;
866 
867     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
868     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
869     ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
870     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
871     if (n_vertices) {
872       if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
873         ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr);
874       } else {
875         Mat S_VCt;
876 
877         ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr);
878         ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
879         ierr = MatDestroy(&S_VCt);CHKERRQ(ierr);
880       }
881     }
882     ierr = MatDestroy(&B);CHKERRQ(ierr);
883     /* coarse basis functions */
884     for (i=0;i<n_constraints;i++) {
885       PetscScalar *y;
886 
887       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
888       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
889       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
890       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
891       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
892       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
893       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
894       if (pcbddc->switch_static || pcbddc->dbg_flag) {
895         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
896         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
897         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
898         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
899         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
900         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
901       }
902       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
903     }
904   }
905   if (n_constraints) {
906     ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr);
907   }
908 
909   /* compute other basis functions for non-symmetric problems */
910   if (!pcbddc->issym) {
911 
912     if (n_constraints) {
913       Mat S_CCT,B_C;
914 
915       /* this is a lazy thing */
916       ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
917       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
918       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
919       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
920       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
921       if (n_vertices) {
922         Mat B_V,S_VCT;
923 
924         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
925         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
926         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
927         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
928         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
929       }
930       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
931     } else { /* if there are no constraints, reset work */
932       ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
933     }
934     if (n_vertices && n_R) {
935       Mat          A_VRT;
936       PetscScalar  *marray;
937       PetscBLASInt B_N,B_one = 1;
938 
939       ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr);
940       ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
941       ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr);
942       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
943       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one));
944       ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr);
945       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
946     }
947 
948     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
949       for (i=0;i<pcbddc->local_primal_size;i++) {
950         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
951         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
952         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
953         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
954         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
955       }
956     } else {
957       for (i=0;i<pcbddc->local_primal_size;i++) {
958         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
959         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
960         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
961         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
962         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
963       }
964     }
965     /* coarse basis functions */
966     for (i=0;i<pcbddc->local_primal_size;i++) {
967       PetscScalar *y;
968 
969       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
970       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
971       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
972       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
973       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
974       if (i<n_vertices) {
975         y[n_B*i+idx_V_B[i]] = 1.0;
976       }
977       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
978       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
979 
980       if (pcbddc->switch_static || pcbddc->dbg_flag) {
981         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
982         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
983         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
984         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
985         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
986         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
987       }
988       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
989     }
990   }
991   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
992   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
993   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
994   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
995   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
996   /* Checking coarse_sub_mat and coarse basis functios */
997   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
998   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
999   if (pcbddc->dbg_flag) {
1000     Mat         coarse_sub_mat;
1001     Mat         AUXMAT,TM1,TM2,TM3,TM4;
1002     Mat         coarse_phi_D,coarse_phi_B;
1003     Mat         coarse_psi_D,coarse_psi_B;
1004     Mat         A_II,A_BB,A_IB,A_BI;
1005     Mat         C_B,CPHI;
1006     IS          is_dummy;
1007     Vec         mones;
1008     MatType     checkmattype=MATSEQAIJ;
1009     PetscReal   real_value;
1010 
1011     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
1012     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
1013     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
1014     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
1015     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
1016     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
1017     if (unsymmetric_check) {
1018       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
1019       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
1020     }
1021     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
1022 
1023     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1024     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr);
1025     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1026     if (unsymmetric_check) {
1027       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1028       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1029       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1030       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1031       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1032       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1033       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1034       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1035       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1036       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1037       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1038       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1039     } else {
1040       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1041       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1042       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1043       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1044       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1045       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1046       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1047       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1048     }
1049     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1050     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1051     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1052     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1053     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1054     ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1055     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1056     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d      matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1057 
1058     /* check constraints */
1059     ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr);
1060     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1061     ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1062     ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr);
1063     ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1064     ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1065     ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1066     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1067     ierr = MatDestroy(&C_B);CHKERRQ(ierr);
1068     ierr = MatDestroy(&CPHI);CHKERRQ(ierr);
1069     ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1070     ierr = VecDestroy(&mones);CHKERRQ(ierr);
1071 
1072     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1073     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1074     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1075     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1076     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1077     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1078     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1079     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1080     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1081     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1082     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1083     if (unsymmetric_check) {
1084       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1085       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1086     }
1087     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1088   }
1089   /* free memory */
1090   ierr = PetscFree(work);CHKERRQ(ierr);
1091   if (n_vertices) {
1092     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1093   }
1094   if (n_constraints) {
1095     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1096   }
1097   /* get back data */
1098   *coarse_submat_vals_n = coarse_submat_vals;
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 #undef __FUNCT__
1103 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1104 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1105 {
1106   Mat            *work_mat;
1107   IS             isrow_s,iscol_s;
1108   PetscBool      rsorted,csorted;
1109   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1110   PetscErrorCode ierr;
1111 
1112   PetscFunctionBegin;
1113   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1114   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1115   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1116   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1117 
1118   if (!rsorted) {
1119     const PetscInt *idxs;
1120     PetscInt *idxs_sorted,i;
1121 
1122     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1123     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1124     for (i=0;i<rsize;i++) {
1125       idxs_perm_r[i] = i;
1126     }
1127     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1128     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1129     for (i=0;i<rsize;i++) {
1130       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1131     }
1132     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1133     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1134   } else {
1135     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1136     isrow_s = isrow;
1137   }
1138 
1139   if (!csorted) {
1140     if (isrow == iscol) {
1141       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1142       iscol_s = isrow_s;
1143     } else {
1144       const PetscInt *idxs;
1145       PetscInt *idxs_sorted,i;
1146 
1147       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1148       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1149       for (i=0;i<csize;i++) {
1150         idxs_perm_c[i] = i;
1151       }
1152       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1153       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1154       for (i=0;i<csize;i++) {
1155         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1156       }
1157       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1158       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1159     }
1160   } else {
1161     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1162     iscol_s = iscol;
1163   }
1164 
1165   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1166 
1167   if (!rsorted || !csorted) {
1168     Mat      new_mat;
1169     IS       is_perm_r,is_perm_c;
1170 
1171     if (!rsorted) {
1172       PetscInt *idxs_r,i;
1173       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1174       for (i=0;i<rsize;i++) {
1175         idxs_r[idxs_perm_r[i]] = i;
1176       }
1177       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1178       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1179     } else {
1180       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1181     }
1182     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1183 
1184     if (!csorted) {
1185       if (isrow_s == iscol_s) {
1186         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1187         is_perm_c = is_perm_r;
1188       } else {
1189         PetscInt *idxs_c,i;
1190         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1191         for (i=0;i<csize;i++) {
1192           idxs_c[idxs_perm_c[i]] = i;
1193         }
1194         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1195         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1196       }
1197     } else {
1198       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1199     }
1200     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1201 
1202     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1203     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1204     work_mat[0] = new_mat;
1205     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1206     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1207   }
1208 
1209   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1210   *B = work_mat[0];
1211   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1212   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1213   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1214   PetscFunctionReturn(0);
1215 }
1216 
1217 #undef __FUNCT__
1218 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1219 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1220 {
1221   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1222   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1223   Mat            new_mat;
1224   IS             is_local,is_global;
1225   PetscInt       local_size;
1226   PetscBool      isseqaij;
1227   PetscErrorCode ierr;
1228 
1229   PetscFunctionBegin;
1230   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1231   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1232   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1233   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1234   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1235   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1236   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1237 
1238   /* check */
1239   if (pcbddc->dbg_flag) {
1240     Vec       x,x_change;
1241     PetscReal error;
1242 
1243     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1244     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1245     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1246     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1247     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1248     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1249     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1250     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1251     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1252     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1253     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1254     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1255     ierr = VecDestroy(&x);CHKERRQ(ierr);
1256     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1257   }
1258 
1259   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1260   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1261   if (isseqaij) {
1262     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1263   } else {
1264     Mat work_mat;
1265     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1266     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1267     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1268   }
1269   ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
1270 #if !defined(PETSC_USE_COMPLEX)
1271   ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,pcbddc->issym);CHKERRQ(ierr);
1272 #endif
1273   /*
1274   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1275   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1276   */
1277   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1278   PetscFunctionReturn(0);
1279 }
1280 
1281 #undef __FUNCT__
1282 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1283 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1284 {
1285   PC_IS*         pcis = (PC_IS*)(pc->data);
1286   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1287   IS             is_aux1,is_aux2;
1288   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1289   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
1290   PetscInt       vbs,bs;
1291   PetscBT        bitmask;
1292   PetscErrorCode ierr;
1293 
1294   PetscFunctionBegin;
1295   /*
1296     No need to setup local scatters if
1297       - primal space is unchanged
1298         AND
1299       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1300         AND
1301       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1302   */
1303   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1304     PetscFunctionReturn(0);
1305   }
1306   /* destroy old objects */
1307   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1308   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1309   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1310   /* Set Non-overlapping dimensions */
1311   n_B = pcis->n_B;
1312   n_D = pcis->n - n_B;
1313   n_vertices = pcbddc->n_vertices;
1314   /* create auxiliary bitmask */
1315   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1316   for (i=0;i<n_vertices;i++) {
1317     ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr);
1318   }
1319 
1320   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1321   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1322   for (i=0, n_R=0; i<pcis->n; i++) {
1323     if (!PetscBTLookup(bitmask,i)) {
1324       idx_R_local[n_R] = i;
1325       n_R++;
1326     }
1327   }
1328 
1329   /* Block code */
1330   vbs = 1;
1331   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1332   if (bs>1 && !(n_vertices%bs)) {
1333     PetscBool is_blocked = PETSC_TRUE;
1334     PetscInt  *vary;
1335     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1336     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1337     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1338     for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
1339     for (i=0; i<n_vertices/bs; i++) {
1340       if (vary[i]!=0 && vary[i]!=bs) {
1341         is_blocked = PETSC_FALSE;
1342         break;
1343       }
1344     }
1345     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1346       vbs = bs;
1347       for (i=0;i<n_R/vbs;i++) {
1348         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1349       }
1350     }
1351     ierr = PetscFree(vary);CHKERRQ(ierr);
1352   }
1353   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1354   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1355 
1356   /* print some info if requested */
1357   if (pcbddc->dbg_flag) {
1358     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1359     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1360     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1361     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1362     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1363     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1364     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1365   }
1366 
1367   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1368   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1369   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1370   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1371   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1372   for (i=0; i<n_D; i++) {
1373     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1374   }
1375   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1376   for (i=0, j=0; i<n_R; i++) {
1377     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1378       aux_array1[j++] = i;
1379     }
1380   }
1381   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1382   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1383   for (i=0, j=0; i<n_B; i++) {
1384     if (!PetscBTLookup(bitmask,is_indices[i])) {
1385       aux_array2[j++] = i;
1386     }
1387   }
1388   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1389   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1390   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1391   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1392   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1393 
1394   if (pcbddc->switch_static || pcbddc->dbg_flag) {
1395     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1396     for (i=0, j=0; i<n_R; i++) {
1397       if (PetscBTLookup(bitmask,idx_R_local[i])) {
1398         aux_array1[j++] = i;
1399       }
1400     }
1401     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1402     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1403     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1404   }
1405   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1406   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 
1411 #undef __FUNCT__
1412 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1413 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1414 {
1415   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1416   PC_IS          *pcis = (PC_IS*)pc->data;
1417   PC             pc_temp;
1418   Mat            A_RR;
1419   MatReuse       reuse;
1420   PetscScalar    m_one = -1.0;
1421   PetscReal      value;
1422   PetscInt       n_D,n_R,ibs,mbs;
1423   PetscBool      use_exact,use_exact_reduced,issbaij;
1424   PetscErrorCode ierr;
1425   /* prefixes stuff */
1426   char           dir_prefix[256],neu_prefix[256],str_level[16];
1427   size_t         len;
1428 
1429   PetscFunctionBegin;
1430 
1431   /* compute prefixes */
1432   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1433   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1434   if (!pcbddc->current_level) {
1435     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1436     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1437     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1438     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1439   } else {
1440     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1441     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1442     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1443     len -= 15; /* remove "pc_bddc_coarse_" */
1444     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1445     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1446     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1447     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1448     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1449     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1450     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1451     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1452   }
1453 
1454   /* DIRICHLET PROBLEM */
1455   if (dirichlet) {
1456     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1457     if (pcbddc->issym) {
1458       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1459     }
1460     /* Matrix for Dirichlet problem is pcis->A_II */
1461     n_D = pcis->n - pcis->n_B;
1462     if (!pcbddc->ksp_D) { /* create object if not yet build */
1463       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1464       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1465       /* default */
1466       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1467       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1468       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1469       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1470       if (issbaij) {
1471         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1472       } else {
1473         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1474       }
1475       /* Allow user's customization */
1476       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1477       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1478     }
1479     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1480     if (sub_schurs->interior_solver) {
1481       ierr = KSPSetPC(pcbddc->ksp_D,sub_schurs->interior_solver);CHKERRQ(ierr);
1482     }
1483     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1484     if (!n_D) {
1485       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1486       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1487     }
1488     /* Set Up KSP for Dirichlet problem of BDDC */
1489     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1490     /* set ksp_D into pcis data */
1491     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1492     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1493     pcis->ksp_D = pcbddc->ksp_D;
1494   }
1495 
1496   /* NEUMANN PROBLEM */
1497   A_RR = 0;
1498   if (neumann) {
1499     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1500     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1501     if (pcbddc->ksp_R) { /* already created ksp */
1502       PetscInt nn_R;
1503       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1504       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1505       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1506       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1507         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1508         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1509         reuse = MAT_INITIAL_MATRIX;
1510       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1511         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1512           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1513           reuse = MAT_INITIAL_MATRIX;
1514         } else { /* safe to reuse the matrix */
1515           reuse = MAT_REUSE_MATRIX;
1516         }
1517       }
1518       /* last check */
1519       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1520         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1521         reuse = MAT_INITIAL_MATRIX;
1522       }
1523     } else { /* first time, so we need to create the matrix */
1524       reuse = MAT_INITIAL_MATRIX;
1525     }
1526     /* extract A_RR */
1527     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1528     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1529     if (ibs != mbs) {
1530       Mat newmat;
1531       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1532       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1533       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1534     } else {
1535       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1536     }
1537     if (pcbddc->issym) {
1538       ierr = MatSetOption(A_RR,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1539     }
1540     if (!pcbddc->ksp_R) { /* create object if not present */
1541       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1542       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1543       /* default */
1544       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1545       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1546       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1547       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1548       if (issbaij) {
1549         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1550       } else {
1551         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1552       }
1553       /* Allow user's customization */
1554       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1555       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1556     }
1557     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1558     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1559     if (!n_R) {
1560       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1561       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1562     }
1563     /* Set Up KSP for Neumann problem of BDDC */
1564     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1565   }
1566 
1567   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1568   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1569     if (pcbddc->dbg_flag) {
1570       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1571       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1572       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1573     }
1574     if (dirichlet) { /* Dirichlet */
1575       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1576       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1577       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1578       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1579       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1580       /* need to be adapted? */
1581       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1582       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1583       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1584       /* print info */
1585       if (pcbddc->dbg_flag) {
1586         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1587         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1588       }
1589       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1590         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1591       }
1592     }
1593     if (neumann) { /* Neumann */
1594       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1595       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1596       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1597       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1598       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1599       /* need to be adapted? */
1600       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1601       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1602       /* print info */
1603       if (pcbddc->dbg_flag) {
1604         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1605         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1606       }
1607       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1608         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1609       }
1610     }
1611   }
1612   /* free Neumann problem's matrix */
1613   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1614   PetscFunctionReturn(0);
1615 }
1616 
1617 #undef __FUNCT__
1618 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1619 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
1620 {
1621   PetscErrorCode ierr;
1622   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1623 
1624   PetscFunctionBegin;
1625   ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr);
1626   if (!pcbddc->switch_static) {
1627     if (applytranspose && pcbddc->local_auxmat1) {
1628       ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1629       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1630     }
1631     ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1632     ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1633   } else {
1634     ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1635     ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1636     ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1637     ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1638     if (applytranspose && pcbddc->local_auxmat1) {
1639       ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr);
1640       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1641       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1642       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1643     }
1644   }
1645   if (applytranspose) {
1646     ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1647   } else {
1648     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1649   }
1650   ierr = VecSet(inout_B,0.);CHKERRQ(ierr);
1651   if (!pcbddc->switch_static) {
1652     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1653     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1654     if (!applytranspose && pcbddc->local_auxmat1) {
1655       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1656       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1657     }
1658   } else {
1659     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1660     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1661     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1662     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1663     if (!applytranspose && pcbddc->local_auxmat1) {
1664       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1665       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1666     }
1667     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1668     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1669     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1670     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1671   }
1672   PetscFunctionReturn(0);
1673 }
1674 
1675 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1676 #undef __FUNCT__
1677 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1678 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1679 {
1680   PetscErrorCode ierr;
1681   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1682   PC_IS*            pcis = (PC_IS*)  (pc->data);
1683   const PetscScalar zero = 0.0;
1684 
1685   PetscFunctionBegin;
1686   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1687   if (applytranspose) {
1688     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1689     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1690   } else {
1691     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1692     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1693   }
1694   /* start communications from local primal nodes to rhs of coarse solver */
1695   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1696   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1697   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1698 
1699   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1700   /* TODO remove null space when doing multilevel */
1701   if (pcbddc->coarse_ksp) {
1702     Vec rhs,sol;
1703 
1704     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
1705     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
1706     if (applytranspose) {
1707       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1708     } else {
1709       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1710     }
1711   }
1712 
1713   /* Local solution on R nodes */
1714   if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */
1715     ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr);
1716   }
1717 
1718   /* communications from coarse sol to local primal nodes */
1719   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1720   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1721 
1722   /* Sum contributions from two levels */
1723   if (applytranspose) {
1724     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1725     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1726   } else {
1727     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1728     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1729   }
1730   PetscFunctionReturn(0);
1731 }
1732 
1733 #undef __FUNCT__
1734 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1735 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1736 {
1737   PetscErrorCode ierr;
1738   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1739   PetscScalar    *array;
1740   Vec            from,to;
1741 
1742   PetscFunctionBegin;
1743   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1744     from = pcbddc->coarse_vec;
1745     to = pcbddc->vec1_P;
1746     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1747       Vec tvec;
1748 
1749       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1750       ierr = VecResetArray(tvec);CHKERRQ(ierr);
1751       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1752       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
1753       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
1754       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
1755     }
1756   } else { /* from local to global -> put data in coarse right hand side */
1757     from = pcbddc->vec1_P;
1758     to = pcbddc->coarse_vec;
1759   }
1760   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1761   PetscFunctionReturn(0);
1762 }
1763 
1764 #undef __FUNCT__
1765 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1766 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1767 {
1768   PetscErrorCode ierr;
1769   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1770   PetscScalar    *array;
1771   Vec            from,to;
1772 
1773   PetscFunctionBegin;
1774   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1775     from = pcbddc->coarse_vec;
1776     to = pcbddc->vec1_P;
1777   } else { /* from local to global -> put data in coarse right hand side */
1778     from = pcbddc->vec1_P;
1779     to = pcbddc->coarse_vec;
1780   }
1781   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1782   if (smode == SCATTER_FORWARD) {
1783     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1784       Vec tvec;
1785 
1786       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1787       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
1788       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
1789       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
1790     }
1791   } else {
1792     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
1793      ierr = VecResetArray(from);CHKERRQ(ierr);
1794     }
1795   }
1796   PetscFunctionReturn(0);
1797 }
1798 
1799 /* uncomment for testing purposes */
1800 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1801 #undef __FUNCT__
1802 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1803 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1804 {
1805   PetscErrorCode    ierr;
1806   PC_IS*            pcis = (PC_IS*)(pc->data);
1807   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1808   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1809   /* one and zero */
1810   PetscScalar       one=1.0,zero=0.0;
1811   /* space to store constraints and their local indices */
1812   PetscScalar       *constraints_data;
1813   PetscInt          *constraints_idxs,*constraints_idxs_B;
1814   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
1815   PetscInt          *constraints_n;
1816   /* iterators */
1817   PetscInt          i,j,k,total_counts,total_counts_cc,cum;
1818   /* BLAS integers */
1819   PetscBLASInt      lwork,lierr;
1820   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1821   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1822   /* reuse */
1823   PetscInt          olocal_primal_size,olocal_primal_size_cc;
1824   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
1825   /* change of basis */
1826   PetscBool         qr_needed;
1827   PetscBT           change_basis,qr_needed_idx;
1828   /* auxiliary stuff */
1829   PetscInt          *nnz,*is_indices;
1830   PetscInt          ncc;
1831   /* some quantities */
1832   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1833   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1834 
1835   PetscFunctionBegin;
1836   /* Destroy Mat objects computed previously */
1837   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1838   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1839   /* save info on constraints from previous setup (if any) */
1840   olocal_primal_size = pcbddc->local_primal_size;
1841   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
1842   ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr);
1843   ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
1844   ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
1845   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
1846   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
1847 
1848   /* print some info */
1849   if (pcbddc->dbg_flag) {
1850     IS       vertices;
1851     PetscInt nv,nedges,nfaces;
1852     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
1853     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
1854     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
1855     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1856     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1857     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
1858     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
1859     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
1860     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1861   }
1862 
1863   if (!pcbddc->adaptive_selection) {
1864     IS           ISForVertices,*ISForFaces,*ISForEdges;
1865     MatNullSpace nearnullsp;
1866     const Vec    *nearnullvecs;
1867     Vec          *localnearnullsp;
1868     PetscScalar  *array;
1869     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
1870     PetscBool    nnsp_has_cnst;
1871     /* LAPACK working arrays for SVD or POD */
1872     PetscBool    skip_lapack,boolforchange;
1873     PetscScalar  *work;
1874     PetscReal    *singular_vals;
1875 #if defined(PETSC_USE_COMPLEX)
1876     PetscReal    *rwork;
1877 #endif
1878 #if defined(PETSC_MISSING_LAPACK_GESVD)
1879     PetscScalar  *temp_basis,*correlation_mat;
1880 #else
1881     PetscBLASInt dummy_int=1;
1882     PetscScalar  dummy_scalar=1.;
1883 #endif
1884 
1885     /* Get index sets for faces, edges and vertices from graph */
1886     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1887     /* free unneeded index sets */
1888     if (!pcbddc->use_vertices) {
1889       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1890     }
1891     if (!pcbddc->use_edges) {
1892       for (i=0;i<n_ISForEdges;i++) {
1893         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1894       }
1895       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1896       n_ISForEdges = 0;
1897     }
1898     if (!pcbddc->use_faces) {
1899       for (i=0;i<n_ISForFaces;i++) {
1900         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1901       }
1902       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1903       n_ISForFaces = 0;
1904     }
1905 
1906 #if defined(PETSC_USE_DEBUG)
1907     /* HACK: when solving singular problems not using vertices, a change of basis is mandatory.
1908        Also use_change_of_basis should be consistent among processors */
1909     if (pcbddc->NullSpace) {
1910       PetscBool tbool[2],gbool[2];
1911 
1912       if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) {
1913         pcbddc->use_change_of_basis = PETSC_TRUE;
1914         if (!ISForEdges) {
1915           pcbddc->use_change_on_faces = PETSC_TRUE;
1916         }
1917       }
1918       tbool[0] = pcbddc->use_change_of_basis;
1919       tbool[1] = pcbddc->use_change_on_faces;
1920       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1921       pcbddc->use_change_of_basis = gbool[0];
1922       pcbddc->use_change_on_faces = gbool[1];
1923     }
1924 #endif
1925 
1926     /* check if near null space is attached to global mat */
1927     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1928     if (nearnullsp) {
1929       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1930       /* remove any stored info */
1931       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1932       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1933       /* store information for BDDC solver reuse */
1934       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1935       pcbddc->onearnullspace = nearnullsp;
1936       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1937       for (i=0;i<nnsp_size;i++) {
1938         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1939       }
1940     } else { /* if near null space is not provided BDDC uses constants by default */
1941       nnsp_size = 0;
1942       nnsp_has_cnst = PETSC_TRUE;
1943     }
1944     /* get max number of constraints on a single cc */
1945     max_constraints = nnsp_size;
1946     if (nnsp_has_cnst) max_constraints++;
1947 
1948     /*
1949          Evaluate maximum storage size needed by the procedure
1950          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
1951          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
1952          There can be multiple constraints per connected component
1953                                                                                                                                                            */
1954     n_vertices = 0;
1955     if (ISForVertices) {
1956       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1957     }
1958     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
1959     ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr);
1960 
1961     total_counts = n_ISForFaces+n_ISForEdges;
1962     total_counts *= max_constraints;
1963     total_counts += n_vertices;
1964     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1965 
1966     total_counts = 0;
1967     max_size_of_constraint = 0;
1968     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1969       IS used_is;
1970       if (i<n_ISForEdges) {
1971         used_is = ISForEdges[i];
1972       } else {
1973         used_is = ISForFaces[i-n_ISForEdges];
1974       }
1975       ierr = ISGetSize(used_is,&j);CHKERRQ(ierr);
1976       total_counts += j;
1977       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1978     }
1979     ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr);
1980 
1981     /* get local part of global near null space vectors */
1982     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1983     for (k=0;k<nnsp_size;k++) {
1984       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1985       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1986       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1987     }
1988 
1989     /* whether or not to skip lapack calls */
1990     skip_lapack = PETSC_TRUE;
1991     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
1992 
1993     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1994     if (!skip_lapack) {
1995       PetscScalar temp_work;
1996 
1997 #if defined(PETSC_MISSING_LAPACK_GESVD)
1998       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1999       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
2000       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
2001       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
2002 #if defined(PETSC_USE_COMPLEX)
2003       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
2004 #endif
2005       /* now we evaluate the optimal workspace using query with lwork=-1 */
2006       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2007       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
2008       lwork = -1;
2009       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2010 #if !defined(PETSC_USE_COMPLEX)
2011       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
2012 #else
2013       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
2014 #endif
2015       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2016       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
2017 #else /* on missing GESVD */
2018       /* SVD */
2019       PetscInt max_n,min_n;
2020       max_n = max_size_of_constraint;
2021       min_n = max_constraints;
2022       if (max_size_of_constraint < max_constraints) {
2023         min_n = max_size_of_constraint;
2024         max_n = max_constraints;
2025       }
2026       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
2027 #if defined(PETSC_USE_COMPLEX)
2028       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
2029 #endif
2030       /* now we evaluate the optimal workspace using query with lwork=-1 */
2031       lwork = -1;
2032       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
2033       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
2034       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
2035       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2036 #if !defined(PETSC_USE_COMPLEX)
2037       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
2038 #else
2039       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
2040 #endif
2041       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2042       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
2043 #endif /* on missing GESVD */
2044       /* Allocate optimal workspace */
2045       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
2046       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
2047     }
2048     /* Now we can loop on constraining sets */
2049     total_counts = 0;
2050     constraints_idxs_ptr[0] = 0;
2051     constraints_data_ptr[0] = 0;
2052     /* vertices */
2053     if (n_vertices) {
2054       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2055       if (nnsp_has_cnst) { /* it considers all possible vertices */
2056         ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2057         for (i=0;i<n_vertices;i++) {
2058           constraints_n[total_counts] = 1;
2059           constraints_data[total_counts] = 1.0;
2060           constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2061           constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2062           total_counts++;
2063         }
2064       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
2065         PetscBool used_vertex;
2066         for (i=0;i<n_vertices;i++) {
2067           used_vertex = PETSC_FALSE;
2068           k = 0;
2069           while (!used_vertex && k<nnsp_size) {
2070             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2071             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
2072               constraints_n[total_counts] = 1;
2073               constraints_idxs[total_counts] = is_indices[i];
2074               constraints_data[total_counts] = 1.0;
2075               constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2076               constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2077               total_counts++;
2078               used_vertex = PETSC_TRUE;
2079             }
2080             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2081             k++;
2082           }
2083         }
2084       }
2085       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2086       n_vertices = total_counts;
2087     }
2088 
2089     /* edges and faces */
2090     total_counts_cc = total_counts;
2091     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2092       IS        used_is;
2093       PetscBool idxs_copied = PETSC_FALSE;
2094 
2095       if (ncc<n_ISForEdges) {
2096         used_is = ISForEdges[ncc];
2097         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2098       } else {
2099         used_is = ISForFaces[ncc-n_ISForEdges];
2100         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2101       }
2102       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2103 
2104       ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr);
2105       ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2106       /* change of basis should not be performed on local periodic nodes */
2107       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2108       if (nnsp_has_cnst) {
2109         PetscScalar quad_value;
2110 
2111         ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2112         idxs_copied = PETSC_TRUE;
2113 
2114         if (!pcbddc->use_nnsp_true) {
2115           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2116         } else {
2117           quad_value = 1.0;
2118         }
2119         for (j=0;j<size_of_constraint;j++) {
2120           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
2121         }
2122         temp_constraints++;
2123         total_counts++;
2124       }
2125       for (k=0;k<nnsp_size;k++) {
2126         PetscReal real_value;
2127         PetscScalar *ptr_to_data;
2128 
2129         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2130         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
2131         for (j=0;j<size_of_constraint;j++) {
2132           ptr_to_data[j] = array[is_indices[j]];
2133         }
2134         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2135         /* check if array is null on the connected component */
2136         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2137         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
2138         if (real_value > 0.0) { /* keep indices and values */
2139           temp_constraints++;
2140           total_counts++;
2141           if (!idxs_copied) {
2142             ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2143             idxs_copied = PETSC_TRUE;
2144           }
2145         }
2146       }
2147       ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2148       valid_constraints = temp_constraints;
2149       if (!pcbddc->use_nnsp_true && temp_constraints) {
2150         if (temp_constraints == 1) { /* just normalize the constraint */
2151           PetscScalar norm,*ptr_to_data;
2152 
2153           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2154           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2155           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
2156           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2157           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
2158         } else { /* perform SVD */
2159           PetscReal   tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2160           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2161 
2162 #if defined(PETSC_MISSING_LAPACK_GESVD)
2163           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2164              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2165              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2166                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2167                 from that computed using LAPACKgesvd
2168              -> This is due to a different computation of eigenvectors in LAPACKheev
2169              -> The quality of the POD-computed basis will be the same */
2170           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2171           /* Store upper triangular part of correlation matrix */
2172           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2173           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2174           for (j=0;j<temp_constraints;j++) {
2175             for (k=0;k<j+1;k++) {
2176               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
2177             }
2178           }
2179           /* compute eigenvalues and eigenvectors of correlation matrix */
2180           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2181           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2182 #if !defined(PETSC_USE_COMPLEX)
2183           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2184 #else
2185           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2186 #endif
2187           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2188           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2189           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2190           j = 0;
2191           while (j < temp_constraints && singular_vals[j] < tol) j++;
2192           total_counts = total_counts-j;
2193           valid_constraints = temp_constraints-j;
2194           /* scale and copy POD basis into used quadrature memory */
2195           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2196           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2197           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2198           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2199           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2200           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2201           if (j<temp_constraints) {
2202             PetscInt ii;
2203             for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
2204             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2205             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2206             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2207             for (k=0;k<temp_constraints-j;k++) {
2208               for (ii=0;ii<size_of_constraint;ii++) {
2209                 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2210               }
2211             }
2212           }
2213 #else  /* on missing GESVD */
2214           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2215           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2216           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2217           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2218 #if !defined(PETSC_USE_COMPLEX)
2219           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2220 #else
2221           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2222 #endif
2223           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2224           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2225           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2226           k = temp_constraints;
2227           if (k > size_of_constraint) k = size_of_constraint;
2228           j = 0;
2229           while (j < k && singular_vals[k-j-1] < tol) j++;
2230           valid_constraints = k-j;
2231           total_counts = total_counts-temp_constraints+valid_constraints;
2232 #endif /* on missing GESVD */
2233         }
2234       }
2235       /* update pointers information */
2236       if (valid_constraints) {
2237         constraints_n[total_counts_cc] = valid_constraints;
2238         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
2239         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
2240         /* set change_of_basis flag */
2241         if (boolforchange) {
2242           PetscBTSet(change_basis,total_counts_cc);
2243         }
2244         total_counts_cc++;
2245       }
2246     }
2247     /* free workspace */
2248     if (!skip_lapack) {
2249       ierr = PetscFree(work);CHKERRQ(ierr);
2250 #if defined(PETSC_USE_COMPLEX)
2251       ierr = PetscFree(rwork);CHKERRQ(ierr);
2252 #endif
2253       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2254 #if defined(PETSC_MISSING_LAPACK_GESVD)
2255       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2256       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2257 #endif
2258     }
2259     for (k=0;k<nnsp_size;k++) {
2260       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2261     }
2262     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2263     /* free index sets of faces, edges and vertices */
2264     for (i=0;i<n_ISForFaces;i++) {
2265       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2266     }
2267     if (n_ISForFaces) {
2268       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2269     }
2270     for (i=0;i<n_ISForEdges;i++) {
2271       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2272     }
2273     if (n_ISForEdges) {
2274       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2275     }
2276     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2277   } else {
2278     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2279 
2280     total_counts = 0;
2281     n_vertices = 0;
2282     if (sub_schurs->is_Ej_com && pcbddc->use_vertices) {
2283       ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr);
2284     }
2285     max_constraints = 0;
2286     total_counts_cc = 0;
2287     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2288       total_counts += pcbddc->adaptive_constraints_n[i];
2289       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
2290       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2291     }
2292     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
2293     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
2294     constraints_idxs = pcbddc->adaptive_constraints_idxs;
2295     constraints_data = pcbddc->adaptive_constraints_data;
2296     /* constraints_n differs from pcbddc->adaptive_constraints_n */
2297     ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr);
2298     total_counts_cc = 0;
2299     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2300       if (pcbddc->adaptive_constraints_n[i]) {
2301         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
2302       }
2303     }
2304 #if 0
2305     printf("Found %d totals (%d)\n",total_counts_cc,total_counts);
2306     for (i=0;i<total_counts_cc;i++) {
2307       printf("const %d, start %d",i,constraints_idxs_ptr[i]);
2308       printf(" end %d:\n",constraints_idxs_ptr[i+1]);
2309       for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) {
2310         printf(" %d",constraints_idxs[j]);
2311       }
2312       printf("\n");
2313       printf("number of cc: %d\n",constraints_n[i]);
2314     }
2315     for (i=0;i<n_vertices;i++) {
2316       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
2317     }
2318     for (i=0;i<sub_schurs->n_subs;i++) {
2319       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2320     }
2321 #endif
2322 
2323     max_size_of_constraint = 0;
2324     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
2325     ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr);
2326     /* Change of basis */
2327     ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr);
2328     if (pcbddc->use_change_of_basis) {
2329       for (i=0;i<sub_schurs->n_subs;i++) {
2330         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2331           ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr);
2332         }
2333       }
2334     }
2335   }
2336   pcbddc->local_primal_size = total_counts;
2337   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2338 
2339   /* map constraints_idxs in boundary numbering */
2340   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr);
2341   if (i != constraints_idxs_ptr[total_counts_cc]) {
2342     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i);
2343   }
2344 
2345   /* Create constraint matrix */
2346   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2347   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2348   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2349 
2350   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2351   /* determine if a QR strategy is needed for change of basis */
2352   qr_needed = PETSC_FALSE;
2353   ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr);
2354   total_primal_vertices=0;
2355   pcbddc->local_primal_size_cc = 0;
2356   for (i=0;i<total_counts_cc;i++) {
2357     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2358     if (size_of_constraint == 1) {
2359       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
2360       pcbddc->local_primal_size_cc += 1;
2361     } else if (PetscBTLookup(change_basis,i)) {
2362       for (k=0;k<constraints_n[i];k++) {
2363         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2364       }
2365       pcbddc->local_primal_size_cc += constraints_n[i];
2366       if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) {
2367         PetscBTSet(qr_needed_idx,i);
2368         qr_needed = PETSC_TRUE;
2369       }
2370     } else {
2371       pcbddc->local_primal_size_cc += 1;
2372     }
2373   }
2374   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
2375   pcbddc->n_vertices = total_primal_vertices;
2376   /* permute indices in order to have a sorted set of vertices */
2377   ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2378 
2379   ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
2380   ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2381   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
2382 
2383   /* nonzero structure of constraint matrix */
2384   /* and get reference dof for local constraints */
2385   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2386   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
2387 
2388   j = total_primal_vertices;
2389   total_counts = total_primal_vertices;
2390   cum = total_primal_vertices;
2391   for (i=n_vertices;i<total_counts_cc;i++) {
2392     if (!PetscBTLookup(change_basis,i)) {
2393       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
2394       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
2395       cum++;
2396       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2397       for (k=0;k<constraints_n[i];k++) {
2398         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2399         nnz[j+k] = size_of_constraint;
2400       }
2401       j += constraints_n[i];
2402     }
2403   }
2404   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2405   ierr = PetscFree(nnz);CHKERRQ(ierr);
2406 
2407   /* set values in constraint matrix */
2408   for (i=0;i<total_primal_vertices;i++) {
2409     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2410   }
2411   total_counts = total_primal_vertices;
2412   for (i=n_vertices;i<total_counts_cc;i++) {
2413     if (!PetscBTLookup(change_basis,i)) {
2414       PetscInt *cols;
2415 
2416       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2417       cols = constraints_idxs+constraints_idxs_ptr[i];
2418       for (k=0;k<constraints_n[i];k++) {
2419         PetscInt    row = total_counts+k;
2420         PetscScalar *vals;
2421 
2422         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
2423         ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2424       }
2425       total_counts += constraints_n[i];
2426     }
2427   }
2428   /* assembling */
2429   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2430   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2431 
2432   /*
2433   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2434   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2435   */
2436   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2437   if (pcbddc->use_change_of_basis) {
2438     /* dual and primal dofs on a single cc */
2439     PetscInt     dual_dofs,primal_dofs;
2440     /* working stuff for GEQRF */
2441     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2442     PetscBLASInt lqr_work;
2443     /* working stuff for UNGQR */
2444     PetscScalar  *gqr_work,lgqr_work_t;
2445     PetscBLASInt lgqr_work;
2446     /* working stuff for TRTRS */
2447     PetscScalar  *trs_rhs;
2448     PetscBLASInt Blas_NRHS;
2449     /* pointers for values insertion into change of basis matrix */
2450     PetscInt     *start_rows,*start_cols;
2451     PetscScalar  *start_vals;
2452     /* working stuff for values insertion */
2453     PetscBT      is_primal;
2454     PetscInt     *aux_primal_numbering_B;
2455     /* matrix sizes */
2456     PetscInt     global_size,local_size;
2457     /* temporary change of basis */
2458     Mat          localChangeOfBasisMatrix;
2459     /* extra space for debugging */
2460     PetscScalar  *dbg_work;
2461 
2462     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2463     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2464     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2465     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2466     /* nonzeros for local mat */
2467     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2468     for (i=0;i<pcis->n;i++) nnz[i]=1;
2469     for (i=n_vertices;i<total_counts_cc;i++) {
2470       if (PetscBTLookup(change_basis,i)) {
2471         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2472         if (PetscBTLookup(qr_needed_idx,i)) {
2473           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
2474         } else {
2475           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
2476           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
2477         }
2478       }
2479     }
2480     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2481     ierr = PetscFree(nnz);CHKERRQ(ierr);
2482     /* Set initial identity in the matrix */
2483     for (i=0;i<pcis->n;i++) {
2484       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2485     }
2486 
2487     if (pcbddc->dbg_flag) {
2488       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2489       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2490     }
2491 
2492 
2493     /* Now we loop on the constraints which need a change of basis */
2494     /*
2495        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2496        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2497 
2498        Basic blocks of change of basis matrix T computed by
2499 
2500           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2501 
2502             | 1        0   ...        0         s_1/S |
2503             | 0        1   ...        0         s_2/S |
2504             |              ...                        |
2505             | 0        ...            1     s_{n-1}/S |
2506             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2507 
2508             with S = \sum_{i=1}^n s_i^2
2509             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2510                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2511 
2512           - QR decomposition of constraints otherwise
2513     */
2514     if (qr_needed) {
2515       /* space to store Q */
2516       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2517       /* first we issue queries for optimal work */
2518       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2519       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2520       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2521       lqr_work = -1;
2522       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2523       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2524       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2525       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2526       lgqr_work = -1;
2527       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2528       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2529       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2530       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2531       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2532       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2533       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2534       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2535       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2536       /* array to store scaling factors for reflectors */
2537       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2538       /* array to store rhs and solution of triangular solver */
2539       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2540       /* allocating workspace for check */
2541       if (pcbddc->dbg_flag) {
2542         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
2543       }
2544     }
2545     /* array to store whether a node is primal or not */
2546     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2547     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2548     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2549     if (i != total_primal_vertices) {
2550       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2551     }
2552     for (i=0;i<total_primal_vertices;i++) {
2553       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2554     }
2555     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2556 
2557     /* loop on constraints and see whether or not they need a change of basis and compute it */
2558     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
2559       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
2560       if (PetscBTLookup(change_basis,total_counts)) {
2561         /* get constraint info */
2562         primal_dofs = constraints_n[total_counts];
2563         dual_dofs = size_of_constraint-primal_dofs;
2564 
2565         if (pcbddc->dbg_flag) {
2566           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr);
2567         }
2568 
2569         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2570 
2571           /* copy quadrature constraints for change of basis check */
2572           if (pcbddc->dbg_flag) {
2573             ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2574           }
2575           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2576           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2577 
2578           /* compute QR decomposition of constraints */
2579           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2580           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2581           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2582           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2583           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2584           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2585           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2586 
2587           /* explictly compute R^-T */
2588           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2589           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2590           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2591           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2592           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2593           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2594           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2595           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2596           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2597           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2598 
2599           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2600           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2601           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2602           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2603           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2604           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2605           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2606           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2607           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2608 
2609           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2610              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2611              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2612           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2613           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2614           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2615           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2616           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2617           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2618           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2619           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
2620           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2621           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2622 
2623           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2624           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
2625           /* insert cols for primal dofs */
2626           for (j=0;j<primal_dofs;j++) {
2627             start_vals = &qr_basis[j*size_of_constraint];
2628             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2629             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2630           }
2631           /* insert cols for dual dofs */
2632           for (j=0,k=0;j<dual_dofs;k++) {
2633             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
2634               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2635               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2636               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2637               j++;
2638             }
2639           }
2640 
2641           /* check change of basis */
2642           if (pcbddc->dbg_flag) {
2643             PetscInt   ii,jj;
2644             PetscBool valid_qr=PETSC_TRUE;
2645             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2646             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2647             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2648             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2649             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2650             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2651             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2652             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
2653             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2654             for (jj=0;jj<size_of_constraint;jj++) {
2655               for (ii=0;ii<primal_dofs;ii++) {
2656                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2657                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2658               }
2659             }
2660             if (!valid_qr) {
2661               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2662               for (jj=0;jj<size_of_constraint;jj++) {
2663                 for (ii=0;ii<primal_dofs;ii++) {
2664                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2665                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2666                   }
2667                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2668                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2669                   }
2670                 }
2671               }
2672             } else {
2673               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2674             }
2675           }
2676         } else { /* simple transformation block */
2677           PetscInt    row,col;
2678           PetscScalar val,norm;
2679 
2680           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2681           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
2682           for (j=0;j<size_of_constraint;j++) {
2683             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
2684             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2685             if (!PetscBTLookup(is_primal,row_B)) {
2686               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
2687               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2688               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2689             } else {
2690               for (k=0;k<size_of_constraint;k++) {
2691                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2692                 if (row != col) {
2693                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
2694                 } else {
2695                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
2696                 }
2697                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2698               }
2699             }
2700           }
2701           if (pcbddc->dbg_flag) {
2702             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2703           }
2704         }
2705       } else {
2706         if (pcbddc->dbg_flag) {
2707           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr);
2708         }
2709       }
2710     }
2711 
2712     /* free workspace */
2713     if (qr_needed) {
2714       if (pcbddc->dbg_flag) {
2715         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
2716       }
2717       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2718       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2719       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2720       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2721       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2722     }
2723     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2724     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2725     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2726 
2727     /* assembling of global change of variable */
2728     {
2729       Mat      tmat;
2730       PetscInt bs;
2731 
2732       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2733       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2734       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2735       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2736       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2737       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2738       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
2739       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
2740       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2741       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2742       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2743       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2744       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2745       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2746       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2747       ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2748       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2749       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2750     }
2751     /* check */
2752     if (pcbddc->dbg_flag) {
2753       PetscReal error;
2754       Vec       x,x_change;
2755 
2756       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2757       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2758       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2759       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2760       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2761       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2762       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2763       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2764       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2765       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2766       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2767       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2768       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2769       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2770       ierr = VecDestroy(&x);CHKERRQ(ierr);
2771       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2772     }
2773 
2774     /* adapt sub_schurs computed (if any) */
2775     if (pcbddc->use_deluxe_scaling) {
2776       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2777       if (sub_schurs->S_Ej_all) {
2778         Mat S_new,tmat;
2779         IS is_all_N;
2780 
2781         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2782         ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,&tmat);CHKERRQ(ierr);
2783         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2784         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2785         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2786         ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2787         sub_schurs->S_Ej_all = S_new;
2788         ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2789         if (sub_schurs->sum_S_Ej_all) {
2790           ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2791           ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2792           ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2793           sub_schurs->sum_S_Ej_all = S_new;
2794           ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2795         }
2796         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2797       }
2798     }
2799     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2800   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2801     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2802     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2803   }
2804 
2805   /* set up change of basis context */
2806   if (pcbddc->ChangeOfBasisMatrix) {
2807     PCBDDCChange_ctx change_ctx;
2808 
2809     if (!pcbddc->new_global_mat) {
2810       PetscInt global_size,local_size;
2811 
2812       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2813       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2814       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2815       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2816       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2817       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2818       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2819       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2820       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2821     } else {
2822       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2823       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2824       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2825     }
2826     if (!pcbddc->user_ChangeOfBasisMatrix) {
2827       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2828       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2829     } else {
2830       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2831       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2832     }
2833     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2834     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2835     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2836     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2837   }
2838 
2839   /* check if a new primal space has been introduced */
2840   pcbddc->new_primal_space_local = PETSC_TRUE;
2841   if (olocal_primal_size == pcbddc->local_primal_size) {
2842     ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2843     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2844     if (!pcbddc->new_primal_space_local) {
2845       ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2846       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2847     }
2848   }
2849   ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr);
2850   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2851   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2852 
2853   /* flush dbg viewer */
2854   if (pcbddc->dbg_flag) {
2855     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2856   }
2857 
2858   /* free workspace */
2859   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2860   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2861   if (!pcbddc->adaptive_selection) {
2862     ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr);
2863     ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr);
2864   } else {
2865     ierr = PetscFree5(pcbddc->adaptive_constraints_n,
2866                       pcbddc->adaptive_constraints_idxs_ptr,
2867                       pcbddc->adaptive_constraints_data_ptr,
2868                       pcbddc->adaptive_constraints_idxs,
2869                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2870     ierr = PetscFree(constraints_n);CHKERRQ(ierr);
2871     ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr);
2872   }
2873   PetscFunctionReturn(0);
2874 }
2875 
2876 #undef __FUNCT__
2877 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2878 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2879 {
2880   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2881   PC_IS       *pcis = (PC_IS*)pc->data;
2882   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2883   PetscInt    ierr,i,vertex_size,N;
2884   PetscViewer viewer=pcbddc->dbg_viewer;
2885 
2886   PetscFunctionBegin;
2887   /* Reset previously computed graph */
2888   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2889   /* Init local Graph struct */
2890   ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr);
2891   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);CHKERRQ(ierr);
2892 
2893   /* Check validity of the csr graph passed in by the user */
2894   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2895     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2896   }
2897 
2898   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2899   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2900     PetscInt  *xadj,*adjncy;
2901     PetscInt  nvtxs;
2902     PetscBool flg_row=PETSC_FALSE;
2903 
2904     if (pcbddc->use_local_adj) {
2905 
2906       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2907       if (flg_row) {
2908         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2909         pcbddc->computed_rowadj = PETSC_TRUE;
2910       }
2911       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2912     } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */
2913       IS                     is_dummy;
2914       ISLocalToGlobalMapping l2gmap_dummy;
2915       PetscInt               j,sum;
2916       PetscInt               *cxadj,*cadjncy;
2917       const PetscInt         *idxs;
2918       PCBDDCGraph            graph;
2919       PetscBT                is_on_boundary;
2920 
2921       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
2922       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
2923       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
2924       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
2925       ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr);
2926       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
2927       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2928       if (flg_row) {
2929         graph->xadj = xadj;
2930         graph->adjncy = adjncy;
2931       }
2932       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
2933       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
2934       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2935 
2936       if (pcbddc->dbg_flag) {
2937         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr);
2938         for (i=0;i<graph->ncc;i++) {
2939           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
2940         }
2941       }
2942 
2943       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
2944       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2945       for (i=0;i<pcis->n_B;i++) {
2946         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
2947       }
2948       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2949 
2950       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
2951       sum = 0;
2952       for (i=0;i<graph->ncc;i++) {
2953         PetscInt sizecc = 0;
2954         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2955           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2956             sizecc++;
2957           }
2958         }
2959         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2960           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2961             cxadj[graph->queue[j]] = sizecc;
2962           }
2963         }
2964         sum += sizecc*sizecc;
2965       }
2966       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
2967       sum = 0;
2968       for (i=0;i<pcis->n;i++) {
2969         PetscInt temp = cxadj[i];
2970         cxadj[i] = sum;
2971         sum += temp;
2972       }
2973       cxadj[pcis->n] = sum;
2974       for (i=0;i<graph->ncc;i++) {
2975         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2976           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2977             PetscInt k,sizecc = 0;
2978             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
2979               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
2980                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
2981                 sizecc++;
2982               }
2983             }
2984           }
2985         }
2986       }
2987       if (sum) {
2988         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
2989       } else {
2990         ierr = PetscFree(cxadj);CHKERRQ(ierr);
2991         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
2992       }
2993       graph->xadj = 0;
2994       graph->adjncy = 0;
2995       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
2996       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
2997     }
2998   }
2999   if (pcbddc->dbg_flag) {
3000     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3001   }
3002 
3003   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
3004   vertex_size = 1;
3005   if (pcbddc->user_provided_isfordofs) {
3006     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
3007       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3008       for (i=0;i<pcbddc->n_ISForDofs;i++) {
3009         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3010         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
3011       }
3012       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
3013       pcbddc->n_ISForDofs = 0;
3014       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
3015     }
3016     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
3017     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
3018   } else {
3019     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
3020       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
3021       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3022       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3023         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3024       }
3025     }
3026   }
3027 
3028   /* Setup of Graph */
3029   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3030     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
3031   }
3032   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3033     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
3034   }
3035   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr);
3036 
3037   /* Graph's connected components analysis */
3038   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
3039 
3040   /* print some info to stdout */
3041   if (pcbddc->dbg_flag) {
3042     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr);
3043   }
3044 
3045   /* mark topography has done */
3046   pcbddc->recompute_topography = PETSC_FALSE;
3047   PetscFunctionReturn(0);
3048 }
3049 
3050 #undef __FUNCT__
3051 #define __FUNCT__ "PCBDDCSubsetNumbering"
3052 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
3053 {
3054   Vec            local_vec,global_vec;
3055   IS             seqis,paris;
3056   VecScatter     scatter_ctx;
3057   PetscScalar    *array;
3058   PetscInt       *temp_global_dofs;
3059   PetscScalar    globalsum;
3060   PetscInt       i,j,s;
3061   PetscInt       nlocals,first_index,old_index,max_local,max_global;
3062   PetscMPIInt    rank_prec_comm,size_prec_comm;
3063   PetscInt       *dof_sizes,*dof_displs;
3064   PetscBool      first_found;
3065   PetscErrorCode ierr;
3066 
3067   PetscFunctionBegin;
3068   /* mpi buffers */
3069   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
3070   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
3071   j = ( !rank_prec_comm ? size_prec_comm : 0);
3072   ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr);
3073   /* get maximum size of subset */
3074   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
3075   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
3076   max_local = 0;
3077   for (i=0;i<n_local_dofs;i++) {
3078     if (max_local < temp_global_dofs[i] ) {
3079       max_local = temp_global_dofs[i];
3080     }
3081   }
3082   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
3083   max_global++;
3084   max_local = 0;
3085   for (i=0;i<n_local_dofs;i++) {
3086     if (max_local < local_dofs[i] ) {
3087       max_local = local_dofs[i];
3088     }
3089   }
3090   max_local++;
3091   /* allocate workspace */
3092   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
3093   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
3094   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
3095   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
3096   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
3097   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
3098   /* create scatter */
3099   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
3100   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
3101   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
3102   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
3103   ierr = ISDestroy(&paris);CHKERRQ(ierr);
3104   /* init array */
3105   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
3106   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3107   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3108   if (local_dofs_mult) {
3109     for (i=0;i<n_local_dofs;i++) {
3110       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
3111     }
3112   } else {
3113     for (i=0;i<n_local_dofs;i++) {
3114       array[local_dofs[i]]=1.0;
3115     }
3116   }
3117   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3118   /* scatter into global vec and get total number of global dofs */
3119   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3120   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3121   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
3122   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
3123   /* Fill global_vec with cumulative function for global numbering */
3124   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
3125   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
3126   nlocals = 0;
3127   first_index = -1;
3128   first_found = PETSC_FALSE;
3129   for (i=0;i<s;i++) {
3130     if (!first_found && PetscRealPart(array[i]) > 0.1) {
3131       first_found = PETSC_TRUE;
3132       first_index = i;
3133     }
3134     nlocals += (PetscInt)PetscRealPart(array[i]);
3135   }
3136   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3137   if (!rank_prec_comm) {
3138     dof_displs[0]=0;
3139     for (i=1;i<size_prec_comm;i++) {
3140       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3141     }
3142   }
3143   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3144   if (first_found) {
3145     array[first_index] += (PetscScalar)nlocals;
3146     old_index = first_index;
3147     for (i=first_index+1;i<s;i++) {
3148       if (PetscRealPart(array[i]) > 0.1) {
3149         array[i] += array[old_index];
3150         old_index = i;
3151       }
3152     }
3153   }
3154   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
3155   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3156   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3157   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3158   /* get global ordering of local dofs */
3159   ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3160   if (local_dofs_mult) {
3161     for (i=0;i<n_local_dofs;i++) {
3162       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
3163     }
3164   } else {
3165     for (i=0;i<n_local_dofs;i++) {
3166       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
3167     }
3168   }
3169   ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3170   /* free workspace */
3171   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
3172   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
3173   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
3174   ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr);
3175   /* return pointer to global ordering of local dofs */
3176   *global_numbering_subset = temp_global_dofs;
3177   PetscFunctionReturn(0);
3178 }
3179 
3180 #undef __FUNCT__
3181 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3182 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3183 {
3184   PetscInt       i,j;
3185   PetscScalar    *alphas;
3186   PetscErrorCode ierr;
3187 
3188   PetscFunctionBegin;
3189   /* this implements stabilized Gram-Schmidt */
3190   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3191   for (i=0;i<n;i++) {
3192     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3193     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3194     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3195   }
3196   ierr = PetscFree(alphas);CHKERRQ(ierr);
3197   PetscFunctionReturn(0);
3198 }
3199 
3200 #undef __FUNCT__
3201 #define __FUNCT__ "MatISGetSubassemblingPattern"
3202 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
3203 {
3204   Mat             subdomain_adj;
3205   IS              new_ranks,ranks_send_to;
3206   MatPartitioning partitioner;
3207   Mat_IS          *matis;
3208   PetscInt        n_neighs,*neighs,*n_shared,**shared;
3209   PetscInt        prank;
3210   PetscMPIInt     size,rank,color;
3211   PetscInt        *xadj,*adjncy,*oldranks;
3212   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
3213   PetscInt        i,local_size,threshold=0;
3214   PetscErrorCode  ierr;
3215   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3216   PetscSubcomm    subcomm;
3217 
3218   PetscFunctionBegin;
3219   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3220   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3221   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3222 
3223   /* Get info on mapping */
3224   matis = (Mat_IS*)(mat->data);
3225   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3226   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3227 
3228   /* build local CSR graph of subdomains' connectivity */
3229   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3230   xadj[0] = 0;
3231   xadj[1] = PetscMax(n_neighs-1,0);
3232   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3233   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3234 
3235   if (threshold) {
3236     PetscInt xadj_count = 0;
3237     for (i=1;i<n_neighs;i++) {
3238       if (n_shared[i] > threshold) {
3239         adjncy[xadj_count] = neighs[i];
3240         adjncy_wgt[xadj_count] = n_shared[i];
3241         xadj_count++;
3242       }
3243     }
3244     xadj[1] = xadj_count;
3245   } else {
3246     if (xadj[1]) {
3247       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3248       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3249     }
3250   }
3251   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3252   if (use_square) {
3253     for (i=0;i<xadj[1];i++) {
3254       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3255     }
3256   }
3257   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3258 
3259   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3260 
3261   /*
3262     Restrict work on active processes only.
3263   */
3264   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3265   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3266   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3267   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3268   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3269   if (color) {
3270     ierr = PetscFree(xadj);CHKERRQ(ierr);
3271     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3272     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3273   } else {
3274     PetscInt coarsening_ratio;
3275     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
3276     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3277     prank = rank;
3278     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
3279     /*
3280     for (i=0;i<size;i++) {
3281       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3282     }
3283     */
3284     for (i=0;i<xadj[1];i++) {
3285       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3286     }
3287     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3288     ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3289     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3290 
3291     /* Partition */
3292     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
3293     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3294     if (use_vwgt) {
3295       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3296       v_wgt[0] = local_size;
3297       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3298     }
3299     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3300     coarsening_ratio = size/n_subdomains;
3301     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3302     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3303     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3304     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3305 
3306     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3307     if (contiguous) {
3308       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3309     } else {
3310       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3311     }
3312     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3313     /* clean up */
3314     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3315     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3316     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3317     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3318   }
3319   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3320 
3321   /* assemble parallel IS for sends */
3322   i = 1;
3323   if (color) i=0;
3324   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3325 
3326   /* get back IS */
3327   *is_sends = ranks_send_to;
3328   PetscFunctionReturn(0);
3329 }
3330 
3331 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3332 
3333 #undef __FUNCT__
3334 #define __FUNCT__ "MatISSubassemble"
3335 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3336 {
3337   Mat                    local_mat;
3338   Mat_IS                 *matis;
3339   IS                     is_sends_internal;
3340   PetscInt               rows,cols,new_local_rows;
3341   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3342   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3343   ISLocalToGlobalMapping l2gmap;
3344   PetscInt*              l2gmap_indices;
3345   const PetscInt*        is_indices;
3346   MatType                new_local_type;
3347   /* buffers */
3348   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3349   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3350   PetscInt               *recv_buffer_idxs_local;
3351   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3352   /* MPI */
3353   MPI_Comm               comm,comm_n;
3354   PetscSubcomm           subcomm;
3355   PetscMPIInt            n_sends,n_recvs,commsize;
3356   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3357   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3358   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3359   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3360   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3361   PetscErrorCode         ierr;
3362 
3363   PetscFunctionBegin;
3364   /* TODO: add missing checks */
3365   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3366   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3367   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3368   PetscValidLogicalCollectiveInt(mat,nis,7);
3369   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3370   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3371   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3372   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3373   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3374   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3375   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3376   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3377     PetscInt mrows,mcols,mnrows,mncols;
3378     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3379     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3380     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3381     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3382     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3383     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3384   }
3385   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3386   PetscValidLogicalCollectiveInt(mat,bs,0);
3387   /* prepare IS for sending if not provided */
3388   if (!is_sends) {
3389     PetscBool pcontig = PETSC_TRUE;
3390     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3391     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3392   } else {
3393     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3394     is_sends_internal = is_sends;
3395   }
3396 
3397   /* get pointer of MATIS data */
3398   matis = (Mat_IS*)mat->data;
3399 
3400   /* get comm */
3401   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3402 
3403   /* compute number of sends */
3404   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3405   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3406 
3407   /* compute number of receives */
3408   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3409   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3410   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3411   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3412   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3413   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3414   ierr = PetscFree(iflags);CHKERRQ(ierr);
3415 
3416   /* restrict comm if requested */
3417   subcomm = 0;
3418   destroy_mat = PETSC_FALSE;
3419   if (restrict_comm) {
3420     PetscMPIInt color,subcommsize;
3421 
3422     color = 0;
3423     if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */
3424     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3425     subcommsize = commsize - subcommsize;
3426     /* check if reuse has been requested */
3427     if (reuse == MAT_REUSE_MATRIX) {
3428       if (*mat_n) {
3429         PetscMPIInt subcommsize2;
3430         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3431         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3432         comm_n = PetscObjectComm((PetscObject)*mat_n);
3433       } else {
3434         comm_n = PETSC_COMM_SELF;
3435       }
3436     } else { /* MAT_INITIAL_MATRIX */
3437       PetscMPIInt rank;
3438 
3439       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3440       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3441       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3442       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3443       comm_n = PetscSubcommChild(subcomm);
3444     }
3445     /* flag to destroy *mat_n if not significative */
3446     if (color) destroy_mat = PETSC_TRUE;
3447   } else {
3448     comm_n = comm;
3449   }
3450 
3451   /* prepare send/receive buffers */
3452   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3453   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3454   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3455   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3456   if (nis) {
3457     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3458   }
3459 
3460   /* Get data from local matrices */
3461   if (!isdense) {
3462     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3463     /* TODO: See below some guidelines on how to prepare the local buffers */
3464     /*
3465        send_buffer_vals should contain the raw values of the local matrix
3466        send_buffer_idxs should contain:
3467        - MatType_PRIVATE type
3468        - PetscInt        size_of_l2gmap
3469        - PetscInt        global_row_indices[size_of_l2gmap]
3470        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3471     */
3472   } else {
3473     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3474     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3475     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3476     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3477     send_buffer_idxs[1] = i;
3478     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3479     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3480     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3481     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3482     for (i=0;i<n_sends;i++) {
3483       ilengths_vals[is_indices[i]] = len*len;
3484       ilengths_idxs[is_indices[i]] = len+2;
3485     }
3486   }
3487   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3488   /* additional is (if any) */
3489   if (nis) {
3490     PetscMPIInt psum;
3491     PetscInt j;
3492     for (j=0,psum=0;j<nis;j++) {
3493       PetscInt plen;
3494       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3495       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3496       psum += len+1; /* indices + lenght */
3497     }
3498     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3499     for (j=0,psum=0;j<nis;j++) {
3500       PetscInt plen;
3501       const PetscInt *is_array_idxs;
3502       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3503       send_buffer_idxs_is[psum] = plen;
3504       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3505       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3506       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3507       psum += plen+1; /* indices + lenght */
3508     }
3509     for (i=0;i<n_sends;i++) {
3510       ilengths_idxs_is[is_indices[i]] = psum;
3511     }
3512     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3513   }
3514 
3515   buf_size_idxs = 0;
3516   buf_size_vals = 0;
3517   buf_size_idxs_is = 0;
3518   for (i=0;i<n_recvs;i++) {
3519     buf_size_idxs += (PetscInt)olengths_idxs[i];
3520     buf_size_vals += (PetscInt)olengths_vals[i];
3521     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3522   }
3523   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3524   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3525   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3526 
3527   /* get new tags for clean communications */
3528   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3529   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3530   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3531 
3532   /* allocate for requests */
3533   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3534   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3535   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3536   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3537   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3538   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3539 
3540   /* communications */
3541   ptr_idxs = recv_buffer_idxs;
3542   ptr_vals = recv_buffer_vals;
3543   ptr_idxs_is = recv_buffer_idxs_is;
3544   for (i=0;i<n_recvs;i++) {
3545     source_dest = onodes[i];
3546     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3547     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3548     ptr_idxs += olengths_idxs[i];
3549     ptr_vals += olengths_vals[i];
3550     if (nis) {
3551       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3552       ptr_idxs_is += olengths_idxs_is[i];
3553     }
3554   }
3555   for (i=0;i<n_sends;i++) {
3556     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3557     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3558     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3559     if (nis) {
3560       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3561     }
3562   }
3563   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3564   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3565 
3566   /* assemble new l2g map */
3567   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3568   ptr_idxs = recv_buffer_idxs;
3569   new_local_rows = 0;
3570   for (i=0;i<n_recvs;i++) {
3571     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3572     ptr_idxs += olengths_idxs[i];
3573   }
3574   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3575   ptr_idxs = recv_buffer_idxs;
3576   new_local_rows = 0;
3577   for (i=0;i<n_recvs;i++) {
3578     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3579     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3580     ptr_idxs += olengths_idxs[i];
3581   }
3582   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3583   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3584   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3585 
3586   /* infer new local matrix type from received local matrices type */
3587   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3588   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3589   if (n_recvs) {
3590     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3591     ptr_idxs = recv_buffer_idxs;
3592     for (i=0;i<n_recvs;i++) {
3593       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3594         new_local_type_private = MATAIJ_PRIVATE;
3595         break;
3596       }
3597       ptr_idxs += olengths_idxs[i];
3598     }
3599     switch (new_local_type_private) {
3600       case MATDENSE_PRIVATE:
3601         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3602           new_local_type = MATSEQAIJ;
3603           bs = 1;
3604         } else { /* if I receive only 1 dense matrix */
3605           new_local_type = MATSEQDENSE;
3606           bs = 1;
3607         }
3608         break;
3609       case MATAIJ_PRIVATE:
3610         new_local_type = MATSEQAIJ;
3611         bs = 1;
3612         break;
3613       case MATBAIJ_PRIVATE:
3614         new_local_type = MATSEQBAIJ;
3615         break;
3616       case MATSBAIJ_PRIVATE:
3617         new_local_type = MATSEQSBAIJ;
3618         break;
3619       default:
3620         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3621         break;
3622     }
3623   } else { /* by default, new_local_type is seqdense */
3624     new_local_type = MATSEQDENSE;
3625     bs = 1;
3626   }
3627 
3628   /* create MATIS object if needed */
3629   if (reuse == MAT_INITIAL_MATRIX) {
3630     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3631     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3632   } else {
3633     /* it also destroys the local matrices */
3634     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3635   }
3636   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3637   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3638 
3639   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3640 
3641   /* Global to local map of received indices */
3642   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3643   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3644   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3645 
3646   /* restore attributes -> type of incoming data and its size */
3647   buf_size_idxs = 0;
3648   for (i=0;i<n_recvs;i++) {
3649     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3650     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3651     buf_size_idxs += (PetscInt)olengths_idxs[i];
3652   }
3653   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3654 
3655   /* set preallocation */
3656   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3657   if (!newisdense) {
3658     PetscInt *new_local_nnz=0;
3659 
3660     ptr_vals = recv_buffer_vals;
3661     ptr_idxs = recv_buffer_idxs_local;
3662     if (n_recvs) {
3663       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3664     }
3665     for (i=0;i<n_recvs;i++) {
3666       PetscInt j;
3667       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3668         for (j=0;j<*(ptr_idxs+1);j++) {
3669           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3670         }
3671       } else {
3672         /* TODO */
3673       }
3674       ptr_idxs += olengths_idxs[i];
3675     }
3676     if (new_local_nnz) {
3677       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3678       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3679       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3680       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3681       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3682       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3683     } else {
3684       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3685     }
3686     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3687   } else {
3688     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3689   }
3690 
3691   /* set values */
3692   ptr_vals = recv_buffer_vals;
3693   ptr_idxs = recv_buffer_idxs_local;
3694   for (i=0;i<n_recvs;i++) {
3695     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3696       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3697       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3698       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3699       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3700       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3701     } else {
3702       /* TODO */
3703     }
3704     ptr_idxs += olengths_idxs[i];
3705     ptr_vals += olengths_vals[i];
3706   }
3707   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3708   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3709   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3710   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3711   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3712   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3713 
3714 #if 0
3715   if (!restrict_comm) { /* check */
3716     Vec       lvec,rvec;
3717     PetscReal infty_error;
3718 
3719     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3720     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3721     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3722     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3723     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3724     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3725     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3726     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3727     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3728   }
3729 #endif
3730 
3731   /* assemble new additional is (if any) */
3732   if (nis) {
3733     PetscInt **temp_idxs,*count_is,j,psum;
3734 
3735     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3736     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3737     ptr_idxs = recv_buffer_idxs_is;
3738     psum = 0;
3739     for (i=0;i<n_recvs;i++) {
3740       for (j=0;j<nis;j++) {
3741         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3742         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3743         psum += plen;
3744         ptr_idxs += plen+1; /* shift pointer to received data */
3745       }
3746     }
3747     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3748     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3749     for (i=1;i<nis;i++) {
3750       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3751     }
3752     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3753     ptr_idxs = recv_buffer_idxs_is;
3754     for (i=0;i<n_recvs;i++) {
3755       for (j=0;j<nis;j++) {
3756         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3757         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3758         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3759         ptr_idxs += plen+1; /* shift pointer to received data */
3760       }
3761     }
3762     for (i=0;i<nis;i++) {
3763       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3764       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3765       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3766     }
3767     ierr = PetscFree(count_is);CHKERRQ(ierr);
3768     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3769     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3770   }
3771   /* free workspace */
3772   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3773   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3774   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3775   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3776   if (isdense) {
3777     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3778     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3779   } else {
3780     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3781   }
3782   if (nis) {
3783     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3784     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3785   }
3786   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3787   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3788   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3789   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3790   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3791   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3792   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3793   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3794   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3795   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3796   ierr = PetscFree(onodes);CHKERRQ(ierr);
3797   if (nis) {
3798     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3799     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3800     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3801   }
3802   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3803   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3804     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3805     for (i=0;i<nis;i++) {
3806       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3807     }
3808   }
3809   PetscFunctionReturn(0);
3810 }
3811 
3812 /* temporary hack into ksp private data structure */
3813 #include <petsc/private/kspimpl.h>
3814 
3815 #undef __FUNCT__
3816 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3817 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3818 {
3819   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3820   PC_IS                  *pcis = (PC_IS*)pc->data;
3821   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3822   MatNullSpace           CoarseNullSpace=NULL;
3823   ISLocalToGlobalMapping coarse_islg;
3824   IS                     coarse_is,*isarray;
3825   PetscInt               i,im_active=-1,active_procs=-1;
3826   PetscInt               nis,nisdofs,nisneu;
3827   PC                     pc_temp;
3828   PCType                 coarse_pc_type;
3829   KSPType                coarse_ksp_type;
3830   PetscBool              multilevel_requested,multilevel_allowed;
3831   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3832   Mat                    t_coarse_mat_is;
3833   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3834   PetscMPIInt            all_procs;
3835   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3836   PetscBool              compute_vecs = PETSC_FALSE;
3837   PetscScalar            *array;
3838   PetscErrorCode         ierr;
3839 
3840   PetscFunctionBegin;
3841   /* Assign global numbering to coarse dofs */
3842   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3843     PetscInt ocoarse_size;
3844     compute_vecs = PETSC_TRUE;
3845     ocoarse_size = pcbddc->coarse_size;
3846     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3847     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3848     /* see if we can avoid some work */
3849     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3850       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3851         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3852         coarse_reuse = PETSC_FALSE;
3853       } else { /* we can safely reuse already computed coarse matrix */
3854         coarse_reuse = PETSC_TRUE;
3855       }
3856     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3857       coarse_reuse = PETSC_FALSE;
3858     }
3859     /* reset any subassembling information */
3860     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3861     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3862   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3863     coarse_reuse = PETSC_TRUE;
3864   }
3865 
3866   /* count "active" (i.e. with positive local size) and "void" processes */
3867   im_active = !!(pcis->n);
3868   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3869   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3870   void_procs = all_procs-active_procs;
3871   csin_type_simple = PETSC_TRUE;
3872   redist = PETSC_FALSE;
3873   if (pcbddc->current_level && void_procs) {
3874     csin_ml = PETSC_TRUE;
3875     ncoarse_ml = void_procs;
3876     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
3877     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
3878       csin_ds = PETSC_TRUE;
3879       ncoarse_ds = pcbddc->redistribute_coarse;
3880       redist = PETSC_TRUE;
3881     } else {
3882       csin_ds = PETSC_TRUE;
3883       ncoarse_ds = active_procs;
3884       redist = PETSC_TRUE;
3885     }
3886   } else {
3887     csin_ml = PETSC_FALSE;
3888     ncoarse_ml = all_procs;
3889     if (void_procs) {
3890       csin_ds = PETSC_TRUE;
3891       ncoarse_ds = void_procs;
3892       csin_type_simple = PETSC_FALSE;
3893     } else {
3894       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
3895         csin_ds = PETSC_TRUE;
3896         ncoarse_ds = pcbddc->redistribute_coarse;
3897         redist = PETSC_TRUE;
3898       } else {
3899         csin_ds = PETSC_FALSE;
3900         ncoarse_ds = all_procs;
3901       }
3902     }
3903   }
3904 
3905   /*
3906     test if we can go multilevel: three conditions must be satisfied:
3907     - we have not exceeded the number of levels requested
3908     - we can actually subassemble the active processes
3909     - we can find a suitable number of MPI processes where we can place the subassembled problem
3910   */
3911   multilevel_allowed = PETSC_FALSE;
3912   multilevel_requested = PETSC_FALSE;
3913   if (pcbddc->current_level < pcbddc->max_levels) {
3914     multilevel_requested = PETSC_TRUE;
3915     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3916       multilevel_allowed = PETSC_FALSE;
3917     } else {
3918       multilevel_allowed = PETSC_TRUE;
3919     }
3920   }
3921   /* determine number of process partecipating to coarse solver */
3922   if (multilevel_allowed) {
3923     ncoarse = ncoarse_ml;
3924     csin = csin_ml;
3925     redist = PETSC_FALSE;
3926   } else {
3927     ncoarse = ncoarse_ds;
3928     csin = csin_ds;
3929   }
3930 
3931   /* creates temporary l2gmap and IS for coarse indexes */
3932   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3933   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3934 
3935   /* creates temporary MATIS object for coarse matrix */
3936   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
3937   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3938   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
3939   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3940 #if 0
3941   {
3942     PetscViewer viewer;
3943     char filename[256];
3944     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
3945     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
3946     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3947     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
3948     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3949   }
3950 #endif
3951   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
3952   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3953   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3954   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3955   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3956 
3957   /* compute dofs splitting and neumann boundaries for coarse dofs */
3958   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
3959     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3960     const PetscInt         *idxs;
3961     ISLocalToGlobalMapping tmap;
3962 
3963     /* create map between primal indices (in local representative ordering) and local primal numbering */
3964     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3965     /* allocate space for temporary storage */
3966     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
3967     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
3968     /* allocate for IS array */
3969     nisdofs = pcbddc->n_ISForDofsLocal;
3970     nisneu = !!pcbddc->NeumannBoundariesLocal;
3971     nis = nisdofs + nisneu;
3972     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
3973     /* dofs splitting */
3974     for (i=0;i<nisdofs;i++) {
3975       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
3976       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
3977       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3978       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3979       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3980       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3981       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3982       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
3983     }
3984     /* neumann boundaries */
3985     if (pcbddc->NeumannBoundariesLocal) {
3986       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
3987       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
3988       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3989       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3990       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3991       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3992       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
3993       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
3994     }
3995     /* free memory */
3996     ierr = PetscFree(tidxs);CHKERRQ(ierr);
3997     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
3998     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
3999   } else {
4000     nis = 0;
4001     nisdofs = 0;
4002     nisneu = 0;
4003     isarray = NULL;
4004   }
4005   /* destroy no longer needed map */
4006   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4007 
4008   /* restrict on coarse candidates (if needed) */
4009   coarse_mat_is = NULL;
4010   if (csin) {
4011     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4012       if (redist) {
4013         PetscMPIInt rank;
4014         PetscInt    spc,n_spc_p1,dest[1],destsize;
4015 
4016         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4017         spc = active_procs/ncoarse;
4018         n_spc_p1 = active_procs%ncoarse;
4019         if (im_active) {
4020           destsize = 1;
4021           if (rank > n_spc_p1*(spc+1)-1) {
4022             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4023           } else {
4024             dest[0] = rank/(spc+1);
4025           }
4026         } else {
4027           destsize = 0;
4028         }
4029         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4030       } else if (csin_type_simple) {
4031         PetscMPIInt rank;
4032         PetscInt    issize,isidx;
4033 
4034         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4035         if (im_active) {
4036           issize = 1;
4037           isidx = (PetscInt)rank;
4038         } else {
4039           issize = 0;
4040           isidx = -1;
4041         }
4042         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4043       } else { /* get a suitable subassembling pattern from MATIS code */
4044         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4045       }
4046 
4047       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4048       if (!redist || ncoarse <= void_procs) {
4049         PetscInt ncoarse_cand,tissize,*nisindices;
4050         PetscInt *coarse_candidates;
4051         const PetscInt* tisindices;
4052 
4053         /* get coarse candidates' ranks in pc communicator */
4054         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4055         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4056         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4057           if (!coarse_candidates[i]) {
4058             coarse_candidates[ncoarse_cand++]=i;
4059           }
4060         }
4061         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4062 
4063 
4064         if (pcbddc->dbg_flag) {
4065           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4066           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4067           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4068           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4069           for (i=0;i<ncoarse_cand;i++) {
4070             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4071           }
4072           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4073           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4074         }
4075         /* shift the pattern on coarse candidates */
4076         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4077         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4078         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4079         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4080         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4081         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4082         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4083       }
4084       if (pcbddc->dbg_flag) {
4085         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4086         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4087         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4088         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4089       }
4090     }
4091     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4092     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4093   } else {
4094     if (pcbddc->dbg_flag) {
4095       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4096       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4097       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4098     }
4099     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4100     coarse_mat_is = t_coarse_mat_is;
4101   }
4102 
4103   /* create local to global scatters for coarse problem */
4104   if (compute_vecs) {
4105     PetscInt lrows;
4106     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4107     if (coarse_mat_is) {
4108       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4109     } else {
4110       lrows = 0;
4111     }
4112     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4113     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4114     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4115     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4116     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4117   }
4118   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4119   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4120 
4121   /* set defaults for coarse KSP and PC */
4122   if (multilevel_allowed) {
4123     coarse_ksp_type = KSPRICHARDSON;
4124     coarse_pc_type = PCBDDC;
4125   } else {
4126     coarse_ksp_type = KSPPREONLY;
4127     coarse_pc_type = PCREDUNDANT;
4128   }
4129 
4130   /* print some info if requested */
4131   if (pcbddc->dbg_flag) {
4132     if (!multilevel_allowed) {
4133       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4134       if (multilevel_requested) {
4135         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4136       } else if (pcbddc->max_levels) {
4137         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4138       }
4139       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4140     }
4141   }
4142 
4143   /* create the coarse KSP object only once with defaults */
4144   if (coarse_mat_is) {
4145     MatReuse coarse_mat_reuse;
4146     PetscViewer dbg_viewer = NULL;
4147     if (pcbddc->dbg_flag) {
4148       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4149       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4150     }
4151     if (!pcbddc->coarse_ksp) {
4152       char prefix[256],str_level[16];
4153       size_t len;
4154       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4155       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4156       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4157       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4158       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4159       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4160       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4161       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4162       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4163       /* prefix */
4164       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4165       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4166       if (!pcbddc->current_level) {
4167         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4168         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4169       } else {
4170         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4171         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4172         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4173         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4174         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4175         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4176       }
4177       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4178       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4179       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4180       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4181       ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4182       /* allow user customization */
4183       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4184     }
4185     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4186     if (nisdofs) {
4187       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4188       for (i=0;i<nisdofs;i++) {
4189         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4190       }
4191     }
4192     if (nisneu) {
4193       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4194       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4195     }
4196 
4197     /* get some info after set from options */
4198     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4199     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4200     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4201     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4202     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4203       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4204       isbddc = PETSC_FALSE;
4205     }
4206     if (isredundant) {
4207       KSP inner_ksp;
4208       PC inner_pc;
4209       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4210       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4211       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4212     }
4213 
4214     /* assemble coarse matrix */
4215     if (coarse_reuse) {
4216       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4217       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4218       coarse_mat_reuse = MAT_REUSE_MATRIX;
4219     } else {
4220       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4221     }
4222     if (isbddc || isnn) {
4223       if (pcbddc->coarsening_ratio > 1) {
4224         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4225           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4226           if (pcbddc->dbg_flag) {
4227             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4228             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4229             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4230             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4231           }
4232         }
4233         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4234       } else {
4235         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4236         coarse_mat = coarse_mat_is;
4237       }
4238     } else {
4239       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4240     }
4241     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4242 
4243     /* propagate symmetry info to coarse matrix */
4244     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
4245     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4246 
4247     /* set operators */
4248     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4249     if (pcbddc->dbg_flag) {
4250       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4251     }
4252   } else { /* processes non partecipating to coarse solver (if any) */
4253     coarse_mat = 0;
4254   }
4255   ierr = PetscFree(isarray);CHKERRQ(ierr);
4256 #if 0
4257   {
4258     PetscViewer viewer;
4259     char filename[256];
4260     sprintf(filename,"coarse_mat.m");
4261     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4262     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4263     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4264     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4265   }
4266 #endif
4267 
4268   /* Compute coarse null space (special handling by BDDC only) */
4269   if (pcbddc->NullSpace) {
4270     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4271   }
4272 
4273   if (pcbddc->coarse_ksp) {
4274     Vec crhs,csol;
4275     PetscBool ispreonly;
4276     if (CoarseNullSpace) {
4277       if (isbddc) {
4278         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4279       } else {
4280         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4281       }
4282     }
4283     /* setup coarse ksp */
4284     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4285     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4286     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4287     /* hack */
4288     if (!csol) {
4289       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4290     }
4291     if (!crhs) {
4292       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4293     }
4294     /* Check coarse problem if in debug mode or if solving with an iterative method */
4295     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4296     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4297       KSP       check_ksp;
4298       KSPType   check_ksp_type;
4299       PC        check_pc;
4300       Vec       check_vec,coarse_vec;
4301       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4302       PetscInt  its;
4303       PetscBool compute_eigs;
4304       PetscReal *eigs_r,*eigs_c;
4305       PetscInt  neigs;
4306       const char *prefix;
4307 
4308       /* Create ksp object suitable for estimation of extreme eigenvalues */
4309       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4310       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4311       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4312       if (ispreonly) {
4313         check_ksp_type = KSPPREONLY;
4314         compute_eigs = PETSC_FALSE;
4315       } else {
4316         check_ksp_type = KSPGMRES;
4317         compute_eigs = PETSC_TRUE;
4318       }
4319       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4320       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4321       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4322       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4323       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4324       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4325       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4326       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4327       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4328       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4329       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4330       /* create random vec */
4331       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4332       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4333       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4334       if (CoarseNullSpace) {
4335         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4336       }
4337       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4338       /* solve coarse problem */
4339       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4340       if (CoarseNullSpace) {
4341         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4342       }
4343       /* set eigenvalue estimation if preonly has not been requested */
4344       if (compute_eigs) {
4345         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4346         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4347         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4348         lambda_max = eigs_r[neigs-1];
4349         lambda_min = eigs_r[0];
4350         if (pcbddc->use_coarse_estimates) {
4351           if (lambda_max>lambda_min) {
4352             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4353             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4354           }
4355         }
4356       }
4357 
4358       /* check coarse problem residual error */
4359       if (pcbddc->dbg_flag) {
4360         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4361         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4362         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4363         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4364         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4365         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4366         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4367         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4368         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4369         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4370         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4371         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4372         if (compute_eigs) {
4373           PetscReal lambda_max_s,lambda_min_s;
4374           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4375           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4376           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4377           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4378           for (i=0;i<neigs;i++) {
4379             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4380           }
4381         }
4382         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4383         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4384       }
4385       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4386       if (compute_eigs) {
4387         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4388         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4389       }
4390     }
4391   }
4392   /* print additional info */
4393   if (pcbddc->dbg_flag) {
4394     /* waits until all processes reaches this point */
4395     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4396     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4397     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4398   }
4399 
4400   /* free memory */
4401   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4402   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4403   PetscFunctionReturn(0);
4404 }
4405 
4406 #undef __FUNCT__
4407 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4408 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4409 {
4410   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4411   PC_IS*         pcis = (PC_IS*)pc->data;
4412   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4413   PetscInt       i,local_size,coarse_size=0;
4414   PetscInt       *local_primal_indices=NULL;
4415   PetscInt       *t_local_primal_indices=NULL;
4416   PetscErrorCode ierr;
4417 
4418   PetscFunctionBegin;
4419   /* Compute global number of coarse dofs */
4420   if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) {
4421     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
4422   }
4423   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult,&coarse_size,&t_local_primal_indices);CHKERRQ(ierr);
4424   ierr = PetscMalloc1(pcbddc->local_primal_size,&local_primal_indices);CHKERRQ(ierr);
4425   local_size = 0;
4426   for (i=0;i<pcbddc->local_primal_size_cc;i++) {
4427     PetscInt j;
4428     for (j=0;j<pcbddc->local_primal_ref_mult[i];j++) local_primal_indices[local_size++] = t_local_primal_indices[i] + j;
4429   }
4430   ierr = PetscFree(t_local_primal_indices);CHKERRQ(ierr);
4431   if (local_size != pcbddc->local_primal_size) {
4432     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size);
4433   }
4434 
4435   /* check numbering */
4436   if (pcbddc->dbg_flag) {
4437     PetscScalar coarsesum,*array;
4438     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4439 
4440     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4441     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4442     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4443     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4444     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4445     for (i=0;i<pcbddc->local_primal_size;i++) {
4446       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4447     }
4448     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4449     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4450     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4451     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4452     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4453     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4454     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4455     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4456     for (i=0;i<pcis->n;i++) {
4457       if (array[i] == 1.0) {
4458         set_error = PETSC_TRUE;
4459         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4460       }
4461     }
4462     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4463     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4464     for (i=0;i<pcis->n;i++) {
4465       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4466     }
4467     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4468     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4469     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4470     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4471     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4472     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4473     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4474       PetscInt *gidxs;
4475 
4476       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
4477       ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
4478       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4479       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4480       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4481       for (i=0;i<pcbddc->local_primal_size;i++) {
4482         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr);
4483       }
4484       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4485       ierr = PetscFree(gidxs);CHKERRQ(ierr);
4486     }
4487     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4488     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4489   }
4490   /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */
4491   /* get back data */
4492   *coarse_size_n = coarse_size;
4493   *local_primal_indices_n = local_primal_indices;
4494   PetscFunctionReturn(0);
4495 }
4496 
4497 #undef __FUNCT__
4498 #define __FUNCT__ "PCBDDCGlobalToLocal"
4499 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4500 {
4501   IS             localis_t;
4502   PetscInt       i,lsize,*idxs,n;
4503   PetscScalar    *vals;
4504   PetscErrorCode ierr;
4505 
4506   PetscFunctionBegin;
4507   /* get indices in local ordering exploiting local to global map */
4508   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4509   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4510   for (i=0;i<lsize;i++) vals[i] = 1.0;
4511   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4512   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4513   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4514   if (idxs) { /* multilevel guard */
4515     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4516   }
4517   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4518   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4519   ierr = PetscFree(vals);CHKERRQ(ierr);
4520   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4521   /* now compute set in local ordering */
4522   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4523   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4524   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4525   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4526   for (i=0,lsize=0;i<n;i++) {
4527     if (PetscRealPart(vals[i]) > 0.5) {
4528       lsize++;
4529     }
4530   }
4531   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4532   for (i=0,lsize=0;i<n;i++) {
4533     if (PetscRealPart(vals[i]) > 0.5) {
4534       idxs[lsize++] = i;
4535     }
4536   }
4537   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4538   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4539   *localis = localis_t;
4540   PetscFunctionReturn(0);
4541 }
4542 
4543 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4544 #undef __FUNCT__
4545 #define __FUNCT__ "PCBDDCMatMult_Private"
4546 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4547 {
4548   PCBDDCChange_ctx change_ctx;
4549   PetscErrorCode   ierr;
4550 
4551   PetscFunctionBegin;
4552   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4553   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4554   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4555   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4556   PetscFunctionReturn(0);
4557 }
4558 
4559 #undef __FUNCT__
4560 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4561 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4562 {
4563   PCBDDCChange_ctx change_ctx;
4564   PetscErrorCode   ierr;
4565 
4566   PetscFunctionBegin;
4567   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4568   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4569   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4570   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4571   PetscFunctionReturn(0);
4572 }
4573 
4574 #undef __FUNCT__
4575 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4576 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4577 {
4578   PC_IS               *pcis=(PC_IS*)pc->data;
4579   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4580   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4581   Mat                 S_j;
4582   PetscInt            *used_xadj,*used_adjncy;
4583   PetscBool           free_used_adj;
4584   PetscErrorCode      ierr;
4585 
4586   PetscFunctionBegin;
4587   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4588   free_used_adj = PETSC_FALSE;
4589   if (pcbddc->sub_schurs_layers == -1) {
4590     used_xadj = NULL;
4591     used_adjncy = NULL;
4592   } else {
4593     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4594       used_xadj = pcbddc->mat_graph->xadj;
4595       used_adjncy = pcbddc->mat_graph->adjncy;
4596     } else if (pcbddc->computed_rowadj) {
4597       used_xadj = pcbddc->mat_graph->xadj;
4598       used_adjncy = pcbddc->mat_graph->adjncy;
4599     } else {
4600       PetscBool      flg_row=PETSC_FALSE;
4601       const PetscInt *xadj,*adjncy;
4602       PetscInt       nvtxs;
4603 
4604       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4605       if (flg_row) {
4606         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4607         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4608         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4609         free_used_adj = PETSC_TRUE;
4610       } else {
4611         pcbddc->sub_schurs_layers = -1;
4612         used_xadj = NULL;
4613         used_adjncy = NULL;
4614       }
4615       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4616     }
4617   }
4618 
4619   /* setup sub_schurs data */
4620   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4621   if (!sub_schurs->use_mumps) {
4622     /* pcbddc->ksp_D up to date only if not using MUMPS */
4623     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4624     ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4625   } else {
4626     ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4627   }
4628   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4629 
4630   /* free adjacency */
4631   if (free_used_adj) {
4632     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4633   }
4634   PetscFunctionReturn(0);
4635 }
4636 
4637 #undef __FUNCT__
4638 #define __FUNCT__ "PCBDDCInitSubSchurs"
4639 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4640 {
4641   PC_IS               *pcis=(PC_IS*)pc->data;
4642   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4643   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4644   PCBDDCGraph         graph;
4645   PetscErrorCode      ierr;
4646 
4647   PetscFunctionBegin;
4648   /* attach interface graph for determining subsets */
4649   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4650     IS verticesIS;
4651 
4652     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4653     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4654     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr);
4655     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr);
4656     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4657     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4658 /*
4659     if (pcbddc->dbg_flag) {
4660       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4661     }
4662 */
4663   } else {
4664     graph = pcbddc->mat_graph;
4665   }
4666 
4667   /* sub_schurs init */
4668   ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr);
4669 
4670   /* free graph struct */
4671   if (pcbddc->sub_schurs_rebuild) {
4672     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4673   }
4674   PetscFunctionReturn(0);
4675 }
4676