xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 9162d60631d455ea36a095e99b37cafd8695ec5e)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscReal       *eigs,thresh;
20   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
21   PetscBool       allocated_S_St;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscErrorCode  ierr;
26 
27   PetscFunctionBegin;
28   if (!sub_schurs->use_mumps) {
29     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
30   }
31 
32   if (pcbddc->dbg_flag) {
33     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
34     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
35     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
36     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
37   }
38 
39   if (pcbddc->dbg_flag) {
40     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
41   }
42 
43   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
44     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
45   }
46 
47   /* max size of subsets */
48   mss = 0;
49   for (i=0;i<sub_schurs->n_subs;i++) {
50     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
51       PetscInt subset_size;
52       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
53       mss = PetscMax(mss,subset_size);
54     }
55   }
56 
57   /* min/max and threshold */
58   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
59   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
60   nmax = PetscMax(nmin,nmax);
61   allocated_S_St = PETSC_FALSE;
62   if (nmin) {
63     allocated_S_St = PETSC_TRUE;
64   }
65 
66   /* allocate lapack workspace */
67   cum = cum2 = 0;
68   maxneigs = 0;
69   for (i=0;i<sub_schurs->n_subs;i++) {
70     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
71       PetscInt n,subset_size;
72 
73       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
74       n = PetscMin(subset_size,nmax);
75       cum += subset_size;
76       cum2 += subset_size*n;
77       maxneigs = PetscMax(maxneigs,n);
78     }
79   }
80   if (mss) {
81     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
82       PetscBLASInt B_itype = 1;
83       PetscBLASInt B_N = mss;
84       PetscReal    zero = 0.0;
85       PetscReal    eps = 0.0; /* dlamch? */
86 
87       B_lwork = -1;
88       S = NULL;
89       St = NULL;
90       eigs = NULL;
91       eigv = NULL;
92       B_iwork = NULL;
93       B_ifail = NULL;
94       thresh = 1.0;
95       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
96 #if defined(PETSC_USE_COMPLEX)
97       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
98 #else
99       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
100 #endif
101       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
102       ierr = PetscFPTrapPop();CHKERRQ(ierr);
103     } else {
104         /* TODO */
105     }
106   } else {
107     lwork = 0;
108   }
109 
110   nv = 0;
111   if (sub_schurs->is_Ej_com && pcbddc->use_vertices) { /* complement of subsets, each entry is a vertex */
112     ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr);
113   }
114   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
115   if (allocated_S_St) {
116     ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr);
117   }
118   ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
119 #if defined(PETSC_USE_COMPLEX)
120   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
121 #endif
122   ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
123                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
124                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
125                       nv+cum,&pcbddc->adaptive_constraints_idxs,
126                       nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
127   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
128 
129   maxneigs = 0;
130   cum = cum2 = cumarray = 0;
131   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
132   pcbddc->adaptive_constraints_data_ptr[0] = 0;
133   if (sub_schurs->is_Ej_com && pcbddc->use_vertices) {
134     const PetscInt *idxs;
135 
136     ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
137     for (cum=0;cum<nv;cum++) {
138       pcbddc->adaptive_constraints_n[cum] = 1;
139       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
140       pcbddc->adaptive_constraints_data[cum] = 1.0;
141       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
142       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
143     }
144     cum2 = cum;
145     ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
146   }
147 
148   if (mss) { /* multilevel */
149     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
150     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
151   }
152 
153   for (i=0;i<sub_schurs->n_subs;i++) {
154     PetscInt j,subset_size;
155 
156     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
157     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
158       const PetscInt *idxs;
159       PetscReal      infty = PETSC_MAX_REAL;
160       PetscInt       eigs_start = 0;
161       PetscBLASInt   B_N;
162 
163       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
164       if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
165         if (sub_schurs->is_hermitian) {
166           PetscInt j;
167           for (j=0;j<subset_size;j++) {
168             ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
169           }
170           for (j=0;j<subset_size;j++) {
171             ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
172           }
173         } else {
174           ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
175           ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
176         }
177       } else {
178         S = Sarray + cumarray;
179         St = Starray + cumarray;
180       }
181 
182       /* Threshold: this is an heuristic for edges */
183       ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
184       thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold;
185 
186       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
187         PetscBLASInt B_itype = 1;
188         PetscBLASInt B_IL, B_IU;
189         PetscReal    eps = -1.0; /* dlamch? */
190         PetscInt     nmin_s;
191 
192         /* ask for eigenvalues larger than thresh */
193         if (pcbddc->dbg_flag) {
194           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
195         }
196         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
197 #if defined(PETSC_USE_COMPLEX)
198         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
199 #else
200         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
201 #endif
202         ierr = PetscFPTrapPop();CHKERRQ(ierr);
203         if (B_ierr) {
204           if (B_ierr < 0 ) {
205             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
206           } else if (B_ierr <= B_N) {
207             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
208           } else {
209             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
210           }
211         }
212 
213         if (B_neigs > nmax) {
214           if (pcbddc->dbg_flag) {
215             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
216           }
217           eigs_start = B_neigs -nmax;
218           B_neigs = nmax;
219         }
220 
221         nmin_s = PetscMin(nmin,B_N);
222         if (B_neigs < nmin_s) {
223           PetscBLASInt B_neigs2;
224 
225           B_IU = B_N - B_neigs;
226           B_IL = B_N - nmin_s + 1;
227           if (pcbddc->dbg_flag) {
228             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
229           }
230           if (sub_schurs->is_hermitian) {
231             PetscInt j;
232             for (j=0;j<subset_size;j++) {
233               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
234             }
235             for (j=0;j<subset_size;j++) {
236               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
237             }
238           } else {
239             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
240             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
241           }
242           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
243 #if defined(PETSC_USE_COMPLEX)
244           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
245 #else
246           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
247 #endif
248           ierr = PetscFPTrapPop();CHKERRQ(ierr);
249           B_neigs += B_neigs2;
250         }
251         if (B_ierr) {
252           if (B_ierr < 0 ) {
253             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
254           } else if (B_ierr <= B_N) {
255             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
256           } else {
257             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
258           }
259         }
260         if (pcbddc->dbg_flag) {
261           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);
262           for (j=0;j<B_neigs;j++) {
263             if (eigs[j] == 0.0) {
264               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");
265             } else {
266               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);
267             }
268           }
269         }
270       } else {
271           /* TODO */
272       }
273 
274       maxneigs = PetscMax(B_neigs,maxneigs);
275       pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
276       if (B_neigs) {
277         ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
278 
279         if (pcbddc->dbg_flag > 1) {
280           PetscInt ii;
281           for (ii=0;ii<B_neigs;ii++) {
282             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
283             for (j=0;j<B_N;j++) {
284               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
285             }
286           }
287         }
288 #if 0
289         for (j=0;j<B_neigs;j++) {
290           PetscBLASInt Blas_N,Blas_one = 1.0;
291           PetscScalar norm;
292           ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
293           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,
294                                                      &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
295           if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
296             norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
297           } else {
298             norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
299           }
300           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
301         }
302 #endif
303         ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
304         pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
305         pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
306         cum++;
307       }
308       ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
309     }
310     /* shift for next computation */
311     cumarray += subset_size*subset_size;
312   }
313   if (pcbddc->dbg_flag) {
314     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
315   }
316 
317   if (mss) {
318     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
319     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
320     /* destroy matrices (junk) */
321     ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
322     ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
323   }
324   if (allocated_S_St) {
325     ierr = PetscFree2(S,St);CHKERRQ(ierr);
326   }
327   ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
328 #if defined(PETSC_USE_COMPLEX)
329   ierr = PetscFree(rwork);CHKERRQ(ierr);
330 #endif
331   if (pcbddc->dbg_flag) {
332     PetscInt maxneigs_r;
333     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
334     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
335   }
336   PetscFunctionReturn(0);
337 }
338 
339 #undef __FUNCT__
340 #define __FUNCT__ "PCBDDCSetUpSolvers"
341 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
342 {
343   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
344   PetscScalar    *coarse_submat_vals;
345   PetscErrorCode ierr;
346 
347   PetscFunctionBegin;
348   /* Setup local scatters R_to_B and (optionally) R_to_D */
349   /* PCBDDCSetUpLocalWorkVectors should be called first! */
350   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
351 
352   /* Setup local neumann solver ksp_R */
353   /* PCBDDCSetUpLocalScatters should be called first! */
354   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
355 
356   /* Change global null space passed in by the user if change of basis has been requested */
357   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
358     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
359   }
360 
361   /*
362      Setup local correction and local part of coarse basis.
363      Gives back the dense local part of the coarse matrix in column major ordering
364   */
365   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
366 
367   /* Compute total number of coarse nodes and setup coarse solver */
368   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
369 
370   /* free */
371   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
372   PetscFunctionReturn(0);
373 }
374 
375 #undef __FUNCT__
376 #define __FUNCT__ "PCBDDCResetCustomization"
377 PetscErrorCode PCBDDCResetCustomization(PC pc)
378 {
379   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
380   PetscErrorCode ierr;
381 
382   PetscFunctionBegin;
383   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
384   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
385   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
386   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
387   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
388   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
389   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
390   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
391   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
392   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
393   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
394   PetscFunctionReturn(0);
395 }
396 
397 #undef __FUNCT__
398 #define __FUNCT__ "PCBDDCResetTopography"
399 PetscErrorCode PCBDDCResetTopography(PC pc)
400 {
401   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
402   PetscErrorCode ierr;
403 
404   PetscFunctionBegin;
405   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
406   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
407   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
408   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
409   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
410   PetscFunctionReturn(0);
411 }
412 
413 #undef __FUNCT__
414 #define __FUNCT__ "PCBDDCResetSolvers"
415 PetscErrorCode PCBDDCResetSolvers(PC pc)
416 {
417   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
418   PetscScalar    *array;
419   PetscErrorCode ierr;
420 
421   PetscFunctionBegin;
422   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
423   if (pcbddc->coarse_phi_B) {
424     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
425     ierr = PetscFree(array);CHKERRQ(ierr);
426   }
427   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
428   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
429   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
430   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
431   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
432   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
433   if (pcbddc->local_auxmat2) {
434     ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
435     ierr = PetscFree(array);CHKERRQ(ierr);
436   }
437   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
438   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
439   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
440   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
441   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
442   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
443   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
444   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
445   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
446   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
447   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
448   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
449   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
450   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
451   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
452   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
453   PetscFunctionReturn(0);
454 }
455 
456 #undef __FUNCT__
457 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
458 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
459 {
460   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
461   PC_IS          *pcis = (PC_IS*)pc->data;
462   VecType        impVecType;
463   PetscInt       n_constraints,n_R,old_size;
464   PetscErrorCode ierr;
465 
466   PetscFunctionBegin;
467   if (!pcbddc->ConstraintMatrix) {
468     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
469   }
470   /* get sizes */
471   n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices;
472   n_R = pcis->n-pcbddc->n_vertices;
473   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
474   /* local work vectors (try to avoid unneeded work)*/
475   /* R nodes */
476   old_size = -1;
477   if (pcbddc->vec1_R) {
478     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
479   }
480   if (n_R != old_size) {
481     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
482     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
483     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
484     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
485     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
486     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
487   }
488   /* local primal dofs */
489   old_size = -1;
490   if (pcbddc->vec1_P) {
491     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
492   }
493   if (pcbddc->local_primal_size != old_size) {
494     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
495     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
496     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
497     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
498   }
499   /* local explicit constraints */
500   old_size = -1;
501   if (pcbddc->vec1_C) {
502     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
503   }
504   if (n_constraints && n_constraints != old_size) {
505     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
506     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
507     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
508     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
509   }
510   PetscFunctionReturn(0);
511 }
512 
513 #undef __FUNCT__
514 #define __FUNCT__ "PCBDDCSetUpCorrection"
515 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
516 {
517   PetscErrorCode         ierr;
518   /* pointers to pcis and pcbddc */
519   PC_IS*                 pcis = (PC_IS*)pc->data;
520   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
521   /* submatrices of local problem */
522   Mat                    A_RV,A_VR,A_VV;
523   /* submatrices of local coarse problem */
524   Mat                    S_VV,S_CV,S_VC,S_CC;
525   /* working matrices */
526   Mat                    C_CR;
527   /* additional working stuff */
528   PC                     pc_R;
529   Mat                    F;
530   PetscBool              isLU,isCHOL,isILU;
531 
532   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
533   PetscScalar            *work;
534   PetscInt               *idx_V_B;
535   PetscInt               n,n_vertices,n_constraints;
536   PetscInt               i,n_R,n_D,n_B;
537   PetscBool              unsymmetric_check;
538   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
539   MatType                impMatType;
540   /* some shortcuts to scalars */
541   PetscScalar            one=1.0,m_one=-1.0;
542 
543   PetscFunctionBegin;
544   n_vertices = pcbddc->n_vertices;
545   n_constraints = pcbddc->local_primal_size-n_vertices;
546   /* Set Non-overlapping dimensions */
547   n_B = pcis->n_B;
548   n_D = pcis->n - n_B;
549   n_R = pcis->n - n_vertices;
550 
551   /* Set types for local objects needed by BDDC precondtioner */
552   impMatType = MATSEQDENSE;
553 
554   /* vertices in boundary numbering */
555   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
556   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
557   if (i != n_vertices) {
558     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
559   }
560 
561   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
562   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
563   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
564   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
565   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
566   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
567   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
568   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
569   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
570   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
571 
572   unsymmetric_check = PETSC_FALSE;
573   /* allocate workspace */
574   n = 0;
575   if (n_constraints) {
576     n += n_R*n_constraints;
577   }
578   if (n_vertices) {
579     n = PetscMax(2*n_R*n_vertices,n);
580   }
581   if (!pcbddc->issym) {
582     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
583     unsymmetric_check = PETSC_TRUE;
584   }
585   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
586 
587   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
588   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
589   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
590   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
591   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
592   if (isLU || isILU || isCHOL) {
593     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
594   } else {
595     F = NULL;
596   }
597 
598   /* Precompute stuffs needed for preprocessing and application of BDDC*/
599   if (n_constraints) {
600     Mat M1,M2,M3;
601     IS  is_aux;
602     /* see if we can save some allocations */
603     if (pcbddc->local_auxmat2) {
604       PetscInt on_R,on_constraints;
605       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
606       if (on_R != n_R || on_constraints != n_constraints) {
607         PetscScalar *marray;
608 
609         ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr);
610         ierr = PetscFree(marray);CHKERRQ(ierr);
611         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
612         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
613       }
614     }
615     /* auxiliary matrices */
616     if (!pcbddc->local_auxmat2) {
617       PetscScalar *marray;
618 
619       ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr);
620       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr);
621       marray += n_R*n_constraints;
622       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr);
623     }
624 
625     /* Extract constraints on R nodes: C_{CR}  */
626     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
627     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
628     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
629 
630     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
631     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
632     for (i=0;i<n_constraints;i++) {
633       const PetscScalar *row_cmat_values;
634       const PetscInt    *row_cmat_indices;
635       PetscInt          size_of_constraint,j;
636 
637       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
638       for (j=0;j<size_of_constraint;j++) {
639         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
640       }
641       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
642     }
643     if (F) {
644       Mat B;
645 
646       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
647       ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr);
648       ierr = MatDestroy(&B);CHKERRQ(ierr);
649     } else {
650       PetscScalar *xarray;
651       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
652       for (i=0;i<n_constraints;i++) {
653         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
654         ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr);
655         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
656         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
657         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
658       }
659       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
660     }
661 
662     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
663     ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
664     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
665     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
666     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
667     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
668     ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr);
669     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
670     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
671     ierr = MatDestroy(&M2);CHKERRQ(ierr);
672     ierr = MatDestroy(&M3);CHKERRQ(ierr);
673     /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */
674     ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
675     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
676     ierr = MatDestroy(&M1);CHKERRQ(ierr);
677   }
678   /* Get submatrices from subdomain matrix */
679   if (n_vertices) {
680     Mat       newmat;
681     IS        is_aux;
682     PetscInt  ibs,mbs;
683     PetscBool issbaij;
684 
685     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
686     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
687     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
688     if (ibs != mbs) { /* need to convert to SEQAIJ */
689       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
690       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
691       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
692       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
693       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
694     } else {
695       /* this is safe */
696       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
697       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
698       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
699         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
700         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
701         ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
702         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
703         ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
704       } else {
705         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
706         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
707       }
708     }
709     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
710   }
711 
712   /* Matrix of coarse basis functions (local) */
713   if (pcbddc->coarse_phi_B) {
714     PetscInt on_B,on_primal,on_D=n_D;
715     if (pcbddc->coarse_phi_D) {
716       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
717     }
718     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
719     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
720       PetscScalar *marray;
721 
722       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
723       ierr = PetscFree(marray);CHKERRQ(ierr);
724       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
725       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
726       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
727       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
728     }
729   }
730 
731   if (!pcbddc->coarse_phi_B) {
732     PetscScalar *marray;
733 
734     n = n_B*pcbddc->local_primal_size;
735     if (pcbddc->switch_static || pcbddc->dbg_flag) {
736       n += n_D*pcbddc->local_primal_size;
737     }
738     if (!pcbddc->issym) {
739       n *= 2;
740     }
741     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
742     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
743     n = n_B*pcbddc->local_primal_size;
744     if (pcbddc->switch_static || pcbddc->dbg_flag) {
745       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
746       n += n_D*pcbddc->local_primal_size;
747     }
748     if (!pcbddc->issym) {
749       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
750       if (pcbddc->switch_static || pcbddc->dbg_flag) {
751         n = n_B*pcbddc->local_primal_size;
752         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
753       }
754     } else {
755       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
756       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
757       if (pcbddc->switch_static || pcbddc->dbg_flag) {
758         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
759         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
760       }
761     }
762   }
763   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
764   /* vertices */
765   if (n_vertices) {
766 
767     if (n_R) {
768       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
769       PetscBLASInt B_N,B_one = 1;
770       PetscScalar  *x,*y;
771 
772       ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
773       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
774       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
775       if (F) {
776         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
777       } else {
778         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
779         for (i=0;i<n_vertices;i++) {
780           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
781           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
782           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
783           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
784           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
785         }
786         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
787       }
788       ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr);
789       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
790       if (n_constraints) {
791         Mat B;
792         ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
793         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
794         ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
795         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
796         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
797         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
798         ierr = MatDestroy(&B);CHKERRQ(ierr);
799       }
800       ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
801       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
802       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
803       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
804       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
805       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
806       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
807       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
808       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
809       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
810       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
811       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
812     } else {
813       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
814       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
815     }
816     /* coarse basis functions */
817     for (i=0;i<n_vertices;i++) {
818       PetscScalar *y;
819 
820       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
821       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
822       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
823       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
824       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
825       y[n_B*i+idx_V_B[i]] = 1.0;
826       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
827       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
828 
829       if (pcbddc->switch_static || pcbddc->dbg_flag) {
830         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
831         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
832         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
833         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
834         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
835         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
836       }
837       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
838     }
839     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
840     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
841   }
842 
843   if (n_constraints) {
844     Mat B;
845 
846     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
847     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
848     ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
849     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
850     if (n_vertices) {
851       ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr);
852     }
853     ierr = MatDestroy(&B);CHKERRQ(ierr);
854     /* coarse basis functions */
855     for (i=0;i<n_constraints;i++) {
856       PetscScalar *y;
857 
858       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
859       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
860       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
861       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
862       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
863       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
864       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
865 
866       if (pcbddc->switch_static || pcbddc->dbg_flag) {
867         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
868         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
869         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
870         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
871         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
872         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
873       }
874       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
875     }
876   }
877 
878   /* compute other basis functions for non-symmetric problems */
879   if (!pcbddc->issym) {
880     Mat B,X;
881 
882     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr);
883 
884     if (n_constraints) {
885       Mat S_CCT,B_C;
886 
887       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
888       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
889       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
890       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
891       if (n_vertices) {
892         Mat B_V,S_VCT;
893 
894         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
895         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
896         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
897         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
898         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
899       }
900       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
901     }
902     if (n_vertices && n_R) {
903       Mat          A_VRT;
904       PetscBLASInt B_N,B_one = 1;
905 
906       if (!n_constraints) { /* if there are no constraints, reset work */
907         ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
908       }
909       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr);
910       ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
911       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
912       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one));
913       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
914     }
915 
916     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr);
917     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
918       for (i=0;i<pcbddc->local_primal_size;i++) {
919         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
920         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
921         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
922         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
923         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
924       }
925     } else {
926       for (i=0;i<pcbddc->local_primal_size;i++) {
927         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
928         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
929         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
930         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
931         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
932       }
933     }
934     ierr = MatDestroy(&B);CHKERRQ(ierr);
935     /* coarse basis functions */
936     for (i=0;i<pcbddc->local_primal_size;i++) {
937       PetscScalar *y;
938 
939       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
940       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
941       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
942       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
943       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
944       if (i<n_vertices) {
945         y[n_B*i+idx_V_B[i]] = 1.0;
946       }
947       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
948       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
949 
950       if (pcbddc->switch_static || pcbddc->dbg_flag) {
951         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
952         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
953         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
954         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
955         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
956         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
957       }
958       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
959     }
960     ierr = MatDestroy(&X);CHKERRQ(ierr);
961   }
962 
963   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
964   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
965   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
966   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
967   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
968   /* Checking coarse_sub_mat and coarse basis functios */
969   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
970   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
971   if (pcbddc->dbg_flag) {
972     Mat         coarse_sub_mat;
973     Mat         AUXMAT,TM1,TM2,TM3,TM4;
974     Mat         coarse_phi_D,coarse_phi_B;
975     Mat         coarse_psi_D,coarse_psi_B;
976     Mat         A_II,A_BB,A_IB,A_BI;
977     Mat         C_B,CPHI;
978     IS          is_dummy;
979     Vec         mones;
980     MatType     checkmattype=MATSEQAIJ;
981     PetscReal   real_value;
982 
983     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
984     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
985     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
986     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
987     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
988     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
989     if (unsymmetric_check) {
990       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
991       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
992     }
993     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
994 
995     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
996     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr);
997     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
998     if (unsymmetric_check) {
999       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1000       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1001       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1002       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1003       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1004       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1005       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1006       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1007       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1008       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1009       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1010       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1011     } else {
1012       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1013       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1014       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1015       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1016       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1017       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1018       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1019       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1020     }
1021     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1022     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1023     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1024     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1025     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1026     ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1027     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1028     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d      matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1029 
1030     /* check constraints */
1031     ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr);
1032     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1033     ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1034     ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr);
1035     ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1036     ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1037     ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1038     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1039     ierr = MatDestroy(&C_B);CHKERRQ(ierr);
1040     ierr = MatDestroy(&CPHI);CHKERRQ(ierr);
1041     ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1042     ierr = VecDestroy(&mones);CHKERRQ(ierr);
1043 
1044     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1045     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1046     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1047     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1048     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1049     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1050     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1051     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1052     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1053     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1054     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1055     if (unsymmetric_check) {
1056       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1057       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1058     }
1059     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1060   }
1061 
1062   /* free memory */
1063   ierr = PetscFree(work);CHKERRQ(ierr);
1064   if (n_vertices) {
1065     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1066   }
1067   if (n_constraints) {
1068     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1069   }
1070   /* get back data */
1071   *coarse_submat_vals_n = coarse_submat_vals;
1072   PetscFunctionReturn(0);
1073 }
1074 
1075 #undef __FUNCT__
1076 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1077 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1078 {
1079   Mat            *work_mat;
1080   IS             isrow_s,iscol_s;
1081   PetscBool      rsorted,csorted;
1082   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1083   PetscErrorCode ierr;
1084 
1085   PetscFunctionBegin;
1086   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1087   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1088   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1089   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1090 
1091   if (!rsorted) {
1092     const PetscInt *idxs;
1093     PetscInt *idxs_sorted,i;
1094 
1095     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1096     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1097     for (i=0;i<rsize;i++) {
1098       idxs_perm_r[i] = i;
1099     }
1100     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1101     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1102     for (i=0;i<rsize;i++) {
1103       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1104     }
1105     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1106     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1107   } else {
1108     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1109     isrow_s = isrow;
1110   }
1111 
1112   if (!csorted) {
1113     if (isrow == iscol) {
1114       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1115       iscol_s = isrow_s;
1116     } else {
1117       const PetscInt *idxs;
1118       PetscInt *idxs_sorted,i;
1119 
1120       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1121       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1122       for (i=0;i<csize;i++) {
1123         idxs_perm_c[i] = i;
1124       }
1125       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1126       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1127       for (i=0;i<csize;i++) {
1128         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1129       }
1130       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1131       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1132     }
1133   } else {
1134     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1135     iscol_s = iscol;
1136   }
1137 
1138   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1139 
1140   if (!rsorted || !csorted) {
1141     Mat      new_mat;
1142     IS       is_perm_r,is_perm_c;
1143 
1144     if (!rsorted) {
1145       PetscInt *idxs_r,i;
1146       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1147       for (i=0;i<rsize;i++) {
1148         idxs_r[idxs_perm_r[i]] = i;
1149       }
1150       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1151       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1152     } else {
1153       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1154     }
1155     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1156 
1157     if (!csorted) {
1158       if (isrow_s == iscol_s) {
1159         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1160         is_perm_c = is_perm_r;
1161       } else {
1162         PetscInt *idxs_c,i;
1163         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1164         for (i=0;i<csize;i++) {
1165           idxs_c[idxs_perm_c[i]] = i;
1166         }
1167         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1168         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1169       }
1170     } else {
1171       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1172     }
1173     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1174 
1175     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1176     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1177     work_mat[0] = new_mat;
1178     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1179     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1180   }
1181 
1182   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1183   *B = work_mat[0];
1184   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1185   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1186   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1187   PetscFunctionReturn(0);
1188 }
1189 
1190 #undef __FUNCT__
1191 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1192 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1193 {
1194   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1195   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1196   Mat            new_mat;
1197   IS             is_local,is_global;
1198   PetscInt       local_size;
1199   PetscBool      isseqaij;
1200   PetscErrorCode ierr;
1201 
1202   PetscFunctionBegin;
1203   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1204   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1205   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1206   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1207   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1208   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1209   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1210 
1211   /* check */
1212   if (pcbddc->dbg_flag) {
1213     Vec       x,x_change;
1214     PetscReal error;
1215 
1216     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1217     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1218     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1219     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1220     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1221     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1222     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1223     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1224     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1225     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1226     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1227     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1228     ierr = VecDestroy(&x);CHKERRQ(ierr);
1229     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1230   }
1231 
1232   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1233   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1234   if (isseqaij) {
1235     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1236   } else {
1237     Mat work_mat;
1238     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1239     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1240     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1241   }
1242   ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
1243 #if !defined(PETSC_USE_COMPLEX)
1244   ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,pcbddc->issym);CHKERRQ(ierr);
1245 #endif
1246   /*
1247   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1248   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1249   */
1250   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 #undef __FUNCT__
1255 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1256 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1257 {
1258   PC_IS*         pcis = (PC_IS*)(pc->data);
1259   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1260   IS             is_aux1,is_aux2;
1261   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1262   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
1263   PetscInt       vbs,bs;
1264   PetscBT        bitmask;
1265   PetscErrorCode ierr;
1266 
1267   PetscFunctionBegin;
1268   /*
1269     No need to setup local scatters if
1270       - primal space is unchanged
1271         AND
1272       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1273         AND
1274       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1275   */
1276   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1277     PetscFunctionReturn(0);
1278   }
1279   /* destroy old objects */
1280   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1281   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1282   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1283   /* Set Non-overlapping dimensions */
1284   n_B = pcis->n_B;
1285   n_D = pcis->n - n_B;
1286   n_vertices = pcbddc->n_vertices;
1287   /* create auxiliary bitmask */
1288   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1289   for (i=0;i<n_vertices;i++) {
1290     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
1291   }
1292 
1293   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1294   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1295   for (i=0, n_R=0; i<pcis->n; i++) {
1296     if (!PetscBTLookup(bitmask,i)) {
1297       idx_R_local[n_R] = i;
1298       n_R++;
1299     }
1300   }
1301 
1302   /* Block code */
1303   vbs = 1;
1304   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1305   if (bs>1 && !(n_vertices%bs)) {
1306     PetscBool is_blocked = PETSC_TRUE;
1307     PetscInt  *vary;
1308     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1309     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1310     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1311     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
1312     for (i=0; i<n_vertices/bs; i++) {
1313       if (vary[i]!=0 && vary[i]!=bs) {
1314         is_blocked = PETSC_FALSE;
1315         break;
1316       }
1317     }
1318     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1319       vbs = bs;
1320       for (i=0;i<n_R/vbs;i++) {
1321         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1322       }
1323     }
1324     ierr = PetscFree(vary);CHKERRQ(ierr);
1325   }
1326   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1327   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1328 
1329   /* print some info if requested */
1330   if (pcbddc->dbg_flag) {
1331     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1332     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1333     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1334     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1335     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1336     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1337     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1338   }
1339 
1340   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1341   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1342   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1343   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1344   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1345   for (i=0; i<n_D; i++) {
1346     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1347   }
1348   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1349   for (i=0, j=0; i<n_R; i++) {
1350     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1351       aux_array1[j++] = i;
1352     }
1353   }
1354   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1355   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1356   for (i=0, j=0; i<n_B; i++) {
1357     if (!PetscBTLookup(bitmask,is_indices[i])) {
1358       aux_array2[j++] = i;
1359     }
1360   }
1361   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1362   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1363   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1364   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1365   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1366 
1367   if (pcbddc->switch_static || pcbddc->dbg_flag) {
1368     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1369     for (i=0, j=0; i<n_R; i++) {
1370       if (PetscBTLookup(bitmask,idx_R_local[i])) {
1371         aux_array1[j++] = i;
1372       }
1373     }
1374     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1375     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1376     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1377   }
1378   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1379   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1380   PetscFunctionReturn(0);
1381 }
1382 
1383 
1384 #undef __FUNCT__
1385 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1386 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1387 {
1388   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1389   PC_IS          *pcis = (PC_IS*)pc->data;
1390   PC             pc_temp;
1391   Mat            A_RR;
1392   MatReuse       reuse;
1393   PetscScalar    m_one = -1.0;
1394   PetscReal      value;
1395   PetscInt       n_D,n_R,ibs,mbs;
1396   PetscBool      use_exact,use_exact_reduced,issbaij;
1397   PetscErrorCode ierr;
1398   /* prefixes stuff */
1399   char           dir_prefix[256],neu_prefix[256],str_level[16];
1400   size_t         len;
1401 
1402   PetscFunctionBegin;
1403 
1404   /* compute prefixes */
1405   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1406   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1407   if (!pcbddc->current_level) {
1408     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1409     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1410     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1411     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1412   } else {
1413     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1414     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1415     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1416     len -= 15; /* remove "pc_bddc_coarse_" */
1417     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1418     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1419     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1420     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1421     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1422     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1423     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1424     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1425   }
1426 
1427   /* DIRICHLET PROBLEM */
1428   if (dirichlet) {
1429     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1430     if (pcbddc->issym) {
1431       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1432     }
1433     /* Matrix for Dirichlet problem is pcis->A_II */
1434     n_D = pcis->n - pcis->n_B;
1435     if (!pcbddc->ksp_D) { /* create object if not yet build */
1436       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1437       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1438       /* default */
1439       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1440       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1441       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1442       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1443       if (issbaij) {
1444         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1445       } else {
1446         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1447       }
1448       /* Allow user's customization */
1449       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1450       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1451     }
1452     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1453     if (sub_schurs->interior_solver) {
1454       ierr = KSPSetPC(pcbddc->ksp_D,sub_schurs->interior_solver);CHKERRQ(ierr);
1455     }
1456     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1457     if (!n_D) {
1458       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1459       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1460     }
1461     /* Set Up KSP for Dirichlet problem of BDDC */
1462     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1463     /* set ksp_D into pcis data */
1464     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1465     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1466     pcis->ksp_D = pcbddc->ksp_D;
1467   }
1468 
1469   /* NEUMANN PROBLEM */
1470   A_RR = 0;
1471   if (neumann) {
1472     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1473     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1474     if (pcbddc->ksp_R) { /* already created ksp */
1475       PetscInt nn_R;
1476       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1477       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1478       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1479       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1480         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1481         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1482         reuse = MAT_INITIAL_MATRIX;
1483       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1484         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1485           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1486           reuse = MAT_INITIAL_MATRIX;
1487         } else { /* safe to reuse the matrix */
1488           reuse = MAT_REUSE_MATRIX;
1489         }
1490       }
1491       /* last check */
1492       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1493         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1494         reuse = MAT_INITIAL_MATRIX;
1495       }
1496     } else { /* first time, so we need to create the matrix */
1497       reuse = MAT_INITIAL_MATRIX;
1498     }
1499     /* extract A_RR */
1500     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1501     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1502     if (ibs != mbs) {
1503       Mat newmat;
1504       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1505       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1506       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1507     } else {
1508       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1509     }
1510     if (pcbddc->issym) {
1511       ierr = MatSetOption(A_RR,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1512     }
1513     if (!pcbddc->ksp_R) { /* create object if not present */
1514       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1515       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1516       /* default */
1517       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1518       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1519       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1520       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1521       if (issbaij) {
1522         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1523       } else {
1524         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1525       }
1526       /* Allow user's customization */
1527       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1528       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1529     }
1530     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1531     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1532     if (!n_R) {
1533       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1534       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1535     }
1536     /* Set Up KSP for Neumann problem of BDDC */
1537     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1538   }
1539 
1540   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1541   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1542     if (pcbddc->dbg_flag) {
1543       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1544       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1545       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1546     }
1547     if (dirichlet) { /* Dirichlet */
1548       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1549       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1550       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1551       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1552       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1553       /* need to be adapted? */
1554       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1555       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1556       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1557       /* print info */
1558       if (pcbddc->dbg_flag) {
1559         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1560         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1561       }
1562       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1563         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1564       }
1565     }
1566     if (neumann) { /* Neumann */
1567       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1568       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1569       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1570       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1571       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1572       /* need to be adapted? */
1573       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1574       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1575       /* print info */
1576       if (pcbddc->dbg_flag) {
1577         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1578         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1579       }
1580       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1581         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1582       }
1583     }
1584   }
1585   /* free Neumann problem's matrix */
1586   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1587   PetscFunctionReturn(0);
1588 }
1589 
1590 #undef __FUNCT__
1591 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1592 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1593 {
1594   PetscErrorCode ierr;
1595   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1596 
1597   PetscFunctionBegin;
1598   if (applytranspose) {
1599     if (pcbddc->local_auxmat1) {
1600       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1601       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1602     }
1603     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1604   } else {
1605     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1606     if (pcbddc->local_auxmat1) {
1607       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1608       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1609     }
1610   }
1611   PetscFunctionReturn(0);
1612 }
1613 
1614 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1615 #undef __FUNCT__
1616 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1617 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1618 {
1619   PetscErrorCode ierr;
1620   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1621   PC_IS*            pcis = (PC_IS*)  (pc->data);
1622   const PetscScalar zero = 0.0;
1623 
1624   PetscFunctionBegin;
1625   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1626   if (applytranspose) {
1627     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1628     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1629   } else {
1630     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1631     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1632   }
1633   /* start communications from local primal nodes to rhs of coarse solver */
1634   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1635   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1636   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1637 
1638   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1639   /* TODO remove null space when doing multilevel */
1640   if (pcbddc->coarse_ksp) {
1641     Vec rhs,sol;
1642 
1643     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
1644     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
1645     if (applytranspose) {
1646       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1647     } else {
1648       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1649     }
1650   }
1651 
1652   /* Local solution on R nodes */
1653   if (pcis->n) {
1654     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1655     ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1656     ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1657     if (pcbddc->switch_static) {
1658       ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1659       ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1660     }
1661     ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1662     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1663     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1664     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1665     if (pcbddc->switch_static) {
1666       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1667       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1668     }
1669   }
1670 
1671   /* communications from coarse sol to local primal nodes */
1672   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1673   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1674 
1675   /* Sum contributions from two levels */
1676   if (applytranspose) {
1677     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1678     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1679   } else {
1680     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1681     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1682   }
1683   PetscFunctionReturn(0);
1684 }
1685 
1686 #undef __FUNCT__
1687 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1688 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1689 {
1690   PetscErrorCode ierr;
1691   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1692   PetscScalar    *array;
1693   Vec            from,to;
1694 
1695   PetscFunctionBegin;
1696   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1697     from = pcbddc->coarse_vec;
1698     to = pcbddc->vec1_P;
1699     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1700       Vec tvec;
1701 
1702       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1703       ierr = VecResetArray(tvec);CHKERRQ(ierr);
1704       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1705       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
1706       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
1707       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
1708     }
1709   } else { /* from local to global -> put data in coarse right hand side */
1710     from = pcbddc->vec1_P;
1711     to = pcbddc->coarse_vec;
1712   }
1713   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1714   PetscFunctionReturn(0);
1715 }
1716 
1717 #undef __FUNCT__
1718 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1719 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1720 {
1721   PetscErrorCode ierr;
1722   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1723   PetscScalar    *array;
1724   Vec            from,to;
1725 
1726   PetscFunctionBegin;
1727   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1728     from = pcbddc->coarse_vec;
1729     to = pcbddc->vec1_P;
1730   } else { /* from local to global -> put data in coarse right hand side */
1731     from = pcbddc->vec1_P;
1732     to = pcbddc->coarse_vec;
1733   }
1734   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1735   if (smode == SCATTER_FORWARD) {
1736     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1737       Vec tvec;
1738 
1739       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1740       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
1741       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
1742       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
1743     }
1744   } else {
1745     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
1746      ierr = VecResetArray(from);CHKERRQ(ierr);
1747     }
1748   }
1749   PetscFunctionReturn(0);
1750 }
1751 
1752 /* uncomment for testing purposes */
1753 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1754 #undef __FUNCT__
1755 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1756 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1757 {
1758   PetscErrorCode    ierr;
1759   PC_IS*            pcis = (PC_IS*)(pc->data);
1760   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1761   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1762   /* one and zero */
1763   PetscScalar       one=1.0,zero=0.0;
1764   /* space to store constraints and their local indices */
1765   PetscScalar       *constraints_data;
1766   PetscInt          *constraints_idxs,*constraints_idxs_B;
1767   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
1768   PetscInt          *constraints_n;
1769   /* iterators */
1770   PetscInt          i,j,k,total_counts,total_counts_cc;
1771   /* BLAS integers */
1772   PetscBLASInt      lwork,lierr;
1773   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1774   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1775   /* reuse */
1776   PetscInt          olocal_primal_size;
1777   PetscInt          *oprimal_indices_local_idxs;
1778   /* change of basis */
1779   PetscBool         boolforchange,qr_needed;
1780   PetscBT           change_basis,qr_needed_idx;
1781   /* auxiliary stuff */
1782   PetscInt          *nnz,*is_indices;
1783   PetscInt          ncc;
1784   /* some quantities */
1785   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1786   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1787 
1788   PetscFunctionBegin;
1789   /* Destroy Mat objects computed previously */
1790   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1791   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1792   /* save info on constraints from previous setup (if any) */
1793   olocal_primal_size = pcbddc->local_primal_size;
1794   ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
1795   ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
1796   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
1797 
1798   /* print some info */
1799   if (pcbddc->dbg_flag) {
1800     IS       vertices;
1801     PetscInt nv,nedges,nfaces;
1802     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
1803     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
1804     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
1805     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1806     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1807     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
1808     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
1809     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
1810     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1811   }
1812 
1813   if (!pcbddc->adaptive_selection) {
1814     IS           ISForVertices,*ISForFaces,*ISForEdges;
1815     MatNullSpace nearnullsp;
1816     const Vec    *nearnullvecs;
1817     Vec          *localnearnullsp;
1818     PetscScalar  *array;
1819     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
1820     PetscBool    nnsp_has_cnst;
1821     /* LAPACK working arrays for SVD or POD */
1822     PetscBool    skip_lapack;
1823     PetscScalar  *work;
1824     PetscReal    *singular_vals;
1825 #if defined(PETSC_USE_COMPLEX)
1826     PetscReal    *rwork;
1827 #endif
1828 #if defined(PETSC_MISSING_LAPACK_GESVD)
1829     PetscScalar  *temp_basis,*correlation_mat;
1830 #else
1831     PetscBLASInt dummy_int=1;
1832     PetscScalar  dummy_scalar=1.;
1833 #endif
1834 
1835     /* Get index sets for faces, edges and vertices from graph */
1836     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1837     /* free unneeded index sets */
1838     if (!pcbddc->use_vertices) {
1839       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1840     }
1841     if (!pcbddc->use_edges) {
1842       for (i=0;i<n_ISForEdges;i++) {
1843         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1844       }
1845       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1846       n_ISForEdges = 0;
1847     }
1848     if (!pcbddc->use_faces) {
1849       for (i=0;i<n_ISForFaces;i++) {
1850         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1851       }
1852       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1853       n_ISForFaces = 0;
1854     }
1855 
1856 #if defined(PETSC_USE_DEBUG)
1857     /* HACK: when solving singular problems not using vertices, a change of basis is mandatory.
1858        Also use_change_of_basis should be consistent among processors */
1859     if (pcbddc->NullSpace) {
1860       PetscBool tbool[2],gbool[2];
1861 
1862       if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) {
1863         pcbddc->use_change_of_basis = PETSC_TRUE;
1864         if (!ISForEdges) {
1865           pcbddc->use_change_on_faces = PETSC_TRUE;
1866         }
1867       }
1868       tbool[0] = pcbddc->use_change_of_basis;
1869       tbool[1] = pcbddc->use_change_on_faces;
1870       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1871       pcbddc->use_change_of_basis = gbool[0];
1872       pcbddc->use_change_on_faces = gbool[1];
1873     }
1874 #endif
1875 
1876     /* check if near null space is attached to global mat */
1877     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1878     if (nearnullsp) {
1879       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1880       /* remove any stored info */
1881       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1882       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1883       /* store information for BDDC solver reuse */
1884       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1885       pcbddc->onearnullspace = nearnullsp;
1886       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1887       for (i=0;i<nnsp_size;i++) {
1888         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1889       }
1890     } else { /* if near null space is not provided BDDC uses constants by default */
1891       nnsp_size = 0;
1892       nnsp_has_cnst = PETSC_TRUE;
1893     }
1894     /* get max number of constraints on a single cc */
1895     max_constraints = nnsp_size;
1896     if (nnsp_has_cnst) max_constraints++;
1897 
1898     /*
1899          Evaluate maximum storage size needed by the procedure
1900          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
1901          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
1902          There can be multiple constraints per connected component
1903                                                                                                                                                            */
1904     n_vertices = 0;
1905     if (ISForVertices) {
1906       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1907     }
1908     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
1909     ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr);
1910 
1911     total_counts = n_ISForFaces+n_ISForEdges;
1912     total_counts *= max_constraints;
1913     total_counts += n_vertices;
1914     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1915 
1916     total_counts = 0;
1917     max_size_of_constraint = 0;
1918     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1919       IS used_is;
1920       if (i<n_ISForEdges) {
1921         used_is = ISForEdges[i];
1922       } else {
1923         used_is = ISForFaces[i-n_ISForEdges];
1924       }
1925       ierr = ISGetSize(used_is,&j);CHKERRQ(ierr);
1926       total_counts += j;
1927       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1928     }
1929     ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr);
1930 
1931     /* get local part of global near null space vectors */
1932     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1933     for (k=0;k<nnsp_size;k++) {
1934       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1935       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1936       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1937     }
1938 
1939     /* whether or not to skip lapack calls */
1940     skip_lapack = PETSC_TRUE;
1941     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
1942 
1943     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1944     if (!skip_lapack) {
1945       PetscScalar temp_work;
1946 
1947 #if defined(PETSC_MISSING_LAPACK_GESVD)
1948       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1949       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1950       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1951       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1952 #if defined(PETSC_USE_COMPLEX)
1953       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1954 #endif
1955       /* now we evaluate the optimal workspace using query with lwork=-1 */
1956       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1957       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1958       lwork = -1;
1959       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1960 #if !defined(PETSC_USE_COMPLEX)
1961       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1962 #else
1963       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1964 #endif
1965       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1966       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1967 #else /* on missing GESVD */
1968       /* SVD */
1969       PetscInt max_n,min_n;
1970       max_n = max_size_of_constraint;
1971       min_n = max_constraints;
1972       if (max_size_of_constraint < max_constraints) {
1973         min_n = max_size_of_constraint;
1974         max_n = max_constraints;
1975       }
1976       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1977 #if defined(PETSC_USE_COMPLEX)
1978       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1979 #endif
1980       /* now we evaluate the optimal workspace using query with lwork=-1 */
1981       lwork = -1;
1982       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1983       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1984       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1985       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1986 #if !defined(PETSC_USE_COMPLEX)
1987       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
1988 #else
1989       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
1990 #endif
1991       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1992       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1993 #endif /* on missing GESVD */
1994       /* Allocate optimal workspace */
1995       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1996       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
1997     }
1998     /* Now we can loop on constraining sets */
1999     total_counts = 0;
2000     constraints_idxs_ptr[0] = 0;
2001     constraints_data_ptr[0] = 0;
2002     /* vertices */
2003     if (n_vertices) {
2004       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2005       if (nnsp_has_cnst) { /* it considers all possible vertices */
2006         ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2007         for (i=0;i<n_vertices;i++) {
2008           constraints_n[total_counts] = 1;
2009           constraints_data[total_counts] = 1.0;
2010           constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2011           constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2012           total_counts++;
2013         }
2014       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
2015         PetscBool used_vertex;
2016         for (i=0;i<n_vertices;i++) {
2017           used_vertex = PETSC_FALSE;
2018           k = 0;
2019           while (!used_vertex && k<nnsp_size) {
2020             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2021             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
2022               constraints_n[total_counts] = 1;
2023               constraints_idxs[total_counts] = is_indices[i];
2024               constraints_data[total_counts] = 1.0;
2025               constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2026               constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2027               total_counts++;
2028               used_vertex = PETSC_TRUE;
2029             }
2030             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2031             k++;
2032           }
2033         }
2034       }
2035       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2036       n_vertices = total_counts;
2037     }
2038 
2039     /* edges and faces */
2040     total_counts_cc = total_counts;
2041     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2042       IS        used_is;
2043       PetscBool idxs_copied = PETSC_FALSE;
2044 
2045       if (ncc<n_ISForEdges) {
2046         used_is = ISForEdges[ncc];
2047         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2048       } else {
2049         used_is = ISForFaces[ncc-n_ISForEdges];
2050         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2051       }
2052       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2053 
2054       ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr);
2055       ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2056       /* change of basis should not be performed on local periodic nodes */
2057       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2058       if (nnsp_has_cnst) {
2059         PetscScalar quad_value;
2060 
2061         ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2062         idxs_copied = PETSC_TRUE;
2063 
2064         if (!pcbddc->use_nnsp_true) {
2065           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2066         } else {
2067           quad_value = 1.0;
2068         }
2069         for (j=0;j<size_of_constraint;j++) {
2070           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
2071         }
2072         temp_constraints++;
2073         total_counts++;
2074       }
2075       for (k=0;k<nnsp_size;k++) {
2076         PetscReal real_value;
2077         PetscScalar *ptr_to_data;
2078 
2079         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2080         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
2081         for (j=0;j<size_of_constraint;j++) {
2082           ptr_to_data[j] = array[is_indices[j]];
2083         }
2084         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2085         /* check if array is null on the connected component */
2086         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2087         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
2088         if (real_value > 0.0) { /* keep indices and values */
2089           temp_constraints++;
2090           total_counts++;
2091           if (!idxs_copied) {
2092             ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2093             idxs_copied = PETSC_TRUE;
2094           }
2095         }
2096       }
2097       ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2098       valid_constraints = temp_constraints;
2099       if (!pcbddc->use_nnsp_true && temp_constraints) {
2100         if (temp_constraints == 1) { /* just normalize the constraint */
2101           PetscScalar norm,*ptr_to_data;
2102 
2103           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2104           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2105           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
2106           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2107           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
2108         } else { /* perform SVD */
2109           PetscReal   tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2110           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2111 
2112 #if defined(PETSC_MISSING_LAPACK_GESVD)
2113           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2114              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2115              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2116                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2117                 from that computed using LAPACKgesvd
2118              -> This is due to a different computation of eigenvectors in LAPACKheev
2119              -> The quality of the POD-computed basis will be the same */
2120           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2121           /* Store upper triangular part of correlation matrix */
2122           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2123           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2124           for (j=0;j<temp_constraints;j++) {
2125             for (k=0;k<j+1;k++) {
2126               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
2127             }
2128           }
2129           /* compute eigenvalues and eigenvectors of correlation matrix */
2130           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2131           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2132 #if !defined(PETSC_USE_COMPLEX)
2133           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2134 #else
2135           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2136 #endif
2137           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2138           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2139           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2140           j = 0;
2141           while (j < temp_constraints && singular_vals[j] < tol) j++;
2142           total_counts = total_counts-j;
2143           valid_constraints = temp_constraints-j;
2144           /* scale and copy POD basis into used quadrature memory */
2145           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2146           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2147           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2148           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2149           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2150           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2151           if (j<temp_constraints) {
2152             PetscInt ii;
2153             for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
2154             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2155             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2156             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2157             for (k=0;k<temp_constraints-j;k++) {
2158               for (ii=0;ii<size_of_constraint;ii++) {
2159                 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2160               }
2161             }
2162           }
2163 #else  /* on missing GESVD */
2164           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2165           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2166           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2167           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2168 #if !defined(PETSC_USE_COMPLEX)
2169           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2170 #else
2171           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2172 #endif
2173           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2174           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2175           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2176           k = temp_constraints;
2177           if (k > size_of_constraint) k = size_of_constraint;
2178           j = 0;
2179           while (j < k && singular_vals[k-j-1] < tol) j++;
2180           valid_constraints = k-j;
2181           total_counts = total_counts-temp_constraints+valid_constraints;
2182 #endif /* on missing GESVD */
2183         }
2184       }
2185       /* update pointers information */
2186       if (valid_constraints) {
2187         constraints_n[total_counts_cc] = valid_constraints;
2188         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
2189         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
2190         total_counts_cc++;
2191         /* set change_of_basis flag */
2192         if (boolforchange) {
2193           for (j=valid_constraints;j>0;j--) {
2194             PetscBTSet(change_basis,total_counts-j);
2195           }
2196         }
2197       }
2198     }
2199     /* free workspace */
2200     if (!skip_lapack) {
2201       ierr = PetscFree(work);CHKERRQ(ierr);
2202 #if defined(PETSC_USE_COMPLEX)
2203       ierr = PetscFree(rwork);CHKERRQ(ierr);
2204 #endif
2205       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2206 #if defined(PETSC_MISSING_LAPACK_GESVD)
2207       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2208       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2209 #endif
2210     }
2211     for (k=0;k<nnsp_size;k++) {
2212       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2213     }
2214     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2215     /* free index sets of faces, edges and vertices */
2216     for (i=0;i<n_ISForFaces;i++) {
2217       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2218     }
2219     if (n_ISForFaces) {
2220       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2221     }
2222     for (i=0;i<n_ISForEdges;i++) {
2223       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2224     }
2225     if (n_ISForEdges) {
2226       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2227     }
2228     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2229   } else {
2230     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2231 
2232     total_counts = 0;
2233     n_vertices = 0;
2234     if (sub_schurs->is_Ej_com && pcbddc->use_vertices) {
2235       ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr);
2236     }
2237     max_constraints = 0;
2238     total_counts_cc = 0;
2239     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2240       total_counts += pcbddc->adaptive_constraints_n[i];
2241       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
2242       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2243     }
2244     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
2245     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
2246     constraints_idxs = pcbddc->adaptive_constraints_idxs;
2247     constraints_data = pcbddc->adaptive_constraints_data;
2248     ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr);
2249     total_counts_cc = 0;
2250     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2251       if (pcbddc->adaptive_constraints_n[i]) {
2252         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
2253       }
2254     }
2255 #if 0
2256     printf("Found %d totals (%d)\n",total_counts_cc,total_counts);
2257     for (i=0;i<total_counts_cc;i++) {
2258       printf("const %d, start %d",i,constraints_idxs_ptr[i]);
2259       printf(" end %d:\n",constraints_idxs_ptr[i+1]);
2260       for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) {
2261         printf(" %d",constraints_idxs[j]);
2262       }
2263       printf("\n");
2264       printf("number of cc: %d\n",constraints_n[i]);
2265     }
2266     for (i=0;i<n_vertices;i++) {
2267       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
2268     }
2269     for (i=0;i<sub_schurs->n_subs;i++) {
2270       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2271     }
2272 #endif
2273 
2274     max_size_of_constraint = 0;
2275     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
2276     ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr);
2277     /* Change of basis */
2278     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2279     if (pcbddc->use_change_of_basis) {
2280       PetscInt cum = n_vertices;
2281       for (i=0;i<sub_schurs->n_subs;i++) {
2282         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2283           for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) {
2284             ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr);
2285           }
2286         }
2287         cum += pcbddc->adaptive_constraints_n[i+n_vertices];
2288       }
2289     }
2290   }
2291   pcbddc->local_primal_size = total_counts;
2292 
2293   /* map constraints_idxs in boundary numbering */
2294   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr);
2295   if (i != constraints_idxs_ptr[total_counts_cc]) {
2296     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i);
2297   }
2298 
2299   /* Create constraint matrix */
2300   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2301   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2302   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2303   /* array to compute a local numbering of constraints : vertices first then constraints */
2304   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2305 
2306   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2307   total_primal_vertices=0;
2308   for (i=0;i<total_counts_cc;i++) {
2309     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2310     if (size_of_constraint == 1) {
2311       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
2312     } else if (PetscBTLookup(change_basis,i)) {
2313       for (k=0;k<constraints_n[i];k++) {
2314         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2315       }
2316     }
2317   }
2318   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
2319   pcbddc->n_vertices = total_primal_vertices;
2320   /* permute indices in order to have a sorted set of vertices */
2321   ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2322 
2323   /* determine if a QR strategy is needed for change of basis */
2324   qr_needed = PETSC_FALSE;
2325   ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr);
2326   for (i=n_vertices;i<total_counts_cc;i++) {
2327     if (PetscBTLookup(change_basis,i)) {
2328       if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) {
2329         PetscBTSet(qr_needed_idx,i);
2330         qr_needed = PETSC_TRUE;
2331       }
2332     }
2333   }
2334 
2335   /* get reference dof for local constraints */
2336   total_counts = total_primal_vertices;
2337   for (i=n_vertices;i<total_counts_cc;i++) {
2338     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2339     if (!PetscBTLookup(change_basis,i)) {
2340       for (k=0;k<constraints_n[i];k++) {
2341         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2342       }
2343     }
2344   }
2345 
2346   /* nonzero structure of constraint matrix */
2347   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2348   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
2349   j = total_primal_vertices;
2350   for (i=n_vertices;i<total_counts_cc;i++) {
2351     if (!PetscBTLookup(change_basis,i)) {
2352       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2353       for (k=0;k<constraints_n[i];k++) nnz[j+k] = size_of_constraint;
2354       j += constraints_n[i];
2355     }
2356   }
2357   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2358   ierr = PetscFree(nnz);CHKERRQ(ierr);
2359 
2360   /* set values in constraint matrix */
2361   for (i=0;i<total_primal_vertices;i++) {
2362     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2363   }
2364   total_counts = total_primal_vertices;
2365   for (i=n_vertices;i<total_counts_cc;i++) {
2366     if (!PetscBTLookup(change_basis,i)) {
2367       PetscInt *cols;
2368 
2369       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2370       cols = constraints_idxs+constraints_idxs_ptr[i];
2371       for (k=0;k<constraints_n[i];k++) {
2372         PetscInt    row = total_counts+k;
2373         PetscScalar *vals;
2374 
2375         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
2376         ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2377       }
2378       total_counts += constraints_n[i];
2379     }
2380   }
2381   /* assembling */
2382   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2383   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2384 
2385   /*
2386   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2387   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2388   */
2389   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2390   if (pcbddc->use_change_of_basis) {
2391     /* dual and primal dofs on a single cc */
2392     PetscInt     dual_dofs,primal_dofs;
2393     /* working stuff for GEQRF */
2394     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2395     PetscBLASInt lqr_work;
2396     /* working stuff for UNGQR */
2397     PetscScalar  *gqr_work,lgqr_work_t;
2398     PetscBLASInt lgqr_work;
2399     /* working stuff for TRTRS */
2400     PetscScalar  *trs_rhs;
2401     PetscBLASInt Blas_NRHS;
2402     /* pointers for values insertion into change of basis matrix */
2403     PetscInt     *start_rows,*start_cols;
2404     PetscScalar  *start_vals;
2405     /* working stuff for values insertion */
2406     PetscBT      is_primal;
2407     PetscInt     *aux_primal_numbering_B;
2408     /* matrix sizes */
2409     PetscInt     global_size,local_size;
2410     /* temporary change of basis */
2411     Mat          localChangeOfBasisMatrix;
2412     /* extra space for debugging */
2413     PetscScalar  *dbg_work;
2414 
2415     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2416     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2417     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2418     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2419     /* nonzeros for local mat */
2420     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2421     for (i=0;i<pcis->n;i++) nnz[i]=1;
2422     for (i=n_vertices;i<total_counts_cc;i++) {
2423       if (PetscBTLookup(change_basis,i)) {
2424         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2425         if (PetscBTLookup(qr_needed_idx,i)) {
2426           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
2427         } else {
2428           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
2429           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
2430         }
2431       }
2432     }
2433     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2434     ierr = PetscFree(nnz);CHKERRQ(ierr);
2435     /* Set initial identity in the matrix */
2436     for (i=0;i<pcis->n;i++) {
2437       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2438     }
2439 
2440     if (pcbddc->dbg_flag) {
2441       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2442       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2443     }
2444 
2445 
2446     /* Now we loop on the constraints which need a change of basis */
2447     /*
2448        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2449        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2450 
2451        Basic blocks of change of basis matrix T computed by
2452 
2453           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2454 
2455             | 1        0   ...        0         s_1/S |
2456             | 0        1   ...        0         s_2/S |
2457             |              ...                        |
2458             | 0        ...            1     s_{n-1}/S |
2459             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2460 
2461             with S = \sum_{i=1}^n s_i^2
2462             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2463                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2464 
2465           - QR decomposition of constraints otherwise
2466     */
2467     if (qr_needed) {
2468       /* space to store Q */
2469       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2470       /* first we issue queries for optimal work */
2471       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2472       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2473       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2474       lqr_work = -1;
2475       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2476       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2477       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2478       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2479       lgqr_work = -1;
2480       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2481       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2482       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2483       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2484       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2485       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2486       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2487       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2488       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2489       /* array to store scaling factors for reflectors */
2490       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2491       /* array to store rhs and solution of triangular solver */
2492       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2493       /* allocating workspace for check */
2494       if (pcbddc->dbg_flag) {
2495         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
2496       }
2497     }
2498     /* array to store whether a node is primal or not */
2499     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2500     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2501     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->primal_indices_local_idxs,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2502     if (i != total_primal_vertices) {
2503       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2504     }
2505     for (i=0;i<total_primal_vertices;i++) {
2506       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2507     }
2508     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2509 
2510     /* loop on constraints and see whether or not they need a change of basis and compute it */
2511     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
2512       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
2513       if (PetscBTLookup(change_basis,total_counts)) {
2514         /* get constraint info */
2515         primal_dofs = constraints_n[total_counts];
2516         dual_dofs = size_of_constraint-primal_dofs;
2517 
2518         if (pcbddc->dbg_flag) {
2519           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr);
2520         }
2521 
2522         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2523 
2524           /* copy quadrature constraints for change of basis check */
2525           if (pcbddc->dbg_flag) {
2526             ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2527           }
2528           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2529           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2530 
2531           /* compute QR decomposition of constraints */
2532           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2533           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2534           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2535           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2536           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2537           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2538           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2539 
2540           /* explictly compute R^-T */
2541           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2542           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2543           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2544           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2545           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2546           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2547           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2548           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2549           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2550           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2551 
2552           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2553           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2554           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2555           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2556           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2557           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2558           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2559           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2560           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2561 
2562           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2563              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2564              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2565           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2566           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2567           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2568           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2569           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2570           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2571           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2572           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
2573           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2574           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2575 
2576           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2577           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
2578           /* insert cols for primal dofs */
2579           for (j=0;j<primal_dofs;j++) {
2580             start_vals = &qr_basis[j*size_of_constraint];
2581             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2582             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2583           }
2584           /* insert cols for dual dofs */
2585           for (j=0,k=0;j<dual_dofs;k++) {
2586             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
2587               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2588               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2589               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2590               j++;
2591             }
2592           }
2593 
2594           /* check change of basis */
2595           if (pcbddc->dbg_flag) {
2596             PetscInt   ii,jj;
2597             PetscBool valid_qr=PETSC_TRUE;
2598             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2599             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2600             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2601             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2602             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2603             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2604             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2605             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
2606             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2607             for (jj=0;jj<size_of_constraint;jj++) {
2608               for (ii=0;ii<primal_dofs;ii++) {
2609                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2610                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2611               }
2612             }
2613             if (!valid_qr) {
2614               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2615               for (jj=0;jj<size_of_constraint;jj++) {
2616                 for (ii=0;ii<primal_dofs;ii++) {
2617                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2618                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2619                   }
2620                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2621                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2622                   }
2623                 }
2624               }
2625             } else {
2626               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2627             }
2628           }
2629         } else { /* simple transformation block */
2630           PetscInt    row,col;
2631           PetscScalar val,norm;
2632 
2633           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2634           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
2635           for (j=0;j<size_of_constraint;j++) {
2636             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
2637             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2638             if (!PetscBTLookup(is_primal,row_B)) {
2639               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
2640               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2641               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2642             } else {
2643               for (k=0;k<size_of_constraint;k++) {
2644                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2645                 if (row != col) {
2646                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
2647                 } else {
2648                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
2649                 }
2650                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2651               }
2652             }
2653           }
2654           if (pcbddc->dbg_flag) {
2655             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2656           }
2657         }
2658       } else {
2659         if (pcbddc->dbg_flag) {
2660           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr);
2661         }
2662       }
2663     }
2664 
2665     /* free workspace */
2666     if (qr_needed) {
2667       if (pcbddc->dbg_flag) {
2668         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
2669       }
2670       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2671       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2672       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2673       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2674       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2675     }
2676     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2677     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2678     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2679 
2680     /* assembling of global change of variable */
2681     {
2682       Mat      tmat;
2683       PetscInt bs;
2684 
2685       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2686       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2687       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2688       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2689       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2690       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2691       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
2692       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
2693       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2694       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2695       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2696       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2697       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2698       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2699       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2700       ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2701       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2702       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2703     }
2704     /* check */
2705     if (pcbddc->dbg_flag) {
2706       PetscReal error;
2707       Vec       x,x_change;
2708 
2709       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2710       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2711       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2712       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2713       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2714       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2715       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2716       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2717       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2718       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2719       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2720       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2721       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2722       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2723       ierr = VecDestroy(&x);CHKERRQ(ierr);
2724       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2725     }
2726 
2727     /* adapt sub_schurs computed (if any) */
2728     if (pcbddc->use_deluxe_scaling) {
2729       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2730       if (sub_schurs->S_Ej_all) {
2731         Mat S_new,tmat;
2732         IS is_all_N;
2733 
2734         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2735         ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,&tmat);CHKERRQ(ierr);
2736         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2737         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2738         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2739         ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2740         sub_schurs->S_Ej_all = S_new;
2741         ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2742         if (sub_schurs->sum_S_Ej_all) {
2743           ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2744           ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2745           ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2746           sub_schurs->sum_S_Ej_all = S_new;
2747           ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2748         }
2749         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2750       }
2751     }
2752     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2753   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2754     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2755     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2756   }
2757 
2758   /* set up change of basis context */
2759   if (pcbddc->ChangeOfBasisMatrix) {
2760     PCBDDCChange_ctx change_ctx;
2761 
2762     if (!pcbddc->new_global_mat) {
2763       PetscInt global_size,local_size;
2764 
2765       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2766       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2767       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2768       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2769       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2770       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2771       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2772       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2773       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2774     } else {
2775       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2776       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2777       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2778     }
2779     if (!pcbddc->user_ChangeOfBasisMatrix) {
2780       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2781       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2782     } else {
2783       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2784       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2785     }
2786     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2787     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2788     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2789     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2790   }
2791 
2792   /* check if a new primal space has been introduced */
2793   pcbddc->new_primal_space_local = PETSC_TRUE;
2794   if (olocal_primal_size == pcbddc->local_primal_size) {
2795     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2796     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2797   }
2798   ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2799   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2800   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2801 
2802   /* flush dbg viewer */
2803   if (pcbddc->dbg_flag) {
2804     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2805   }
2806 
2807   /* free workspace */
2808   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2809   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2810   if (!pcbddc->adaptive_selection) {
2811     ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr);
2812     ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr);
2813   } else {
2814     ierr = PetscFree5(pcbddc->adaptive_constraints_n,
2815                       pcbddc->adaptive_constraints_idxs_ptr,
2816                       pcbddc->adaptive_constraints_data_ptr,
2817                       pcbddc->adaptive_constraints_idxs,
2818                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2819     ierr = PetscFree(constraints_n);CHKERRQ(ierr);
2820     ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr);
2821   }
2822   PetscFunctionReturn(0);
2823 }
2824 
2825 #undef __FUNCT__
2826 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2827 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2828 {
2829   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2830   PC_IS       *pcis = (PC_IS*)pc->data;
2831   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2832   PetscInt    ierr,i,vertex_size,N;
2833   PetscViewer viewer=pcbddc->dbg_viewer;
2834 
2835   PetscFunctionBegin;
2836   /* Reset previously computed graph */
2837   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2838   /* Init local Graph struct */
2839   ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr);
2840   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);CHKERRQ(ierr);
2841 
2842   /* Check validity of the csr graph passed in by the user */
2843   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2844     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2845   }
2846 
2847   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2848   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2849     PetscInt  *xadj,*adjncy;
2850     PetscInt  nvtxs;
2851     PetscBool flg_row=PETSC_FALSE;
2852 
2853     if (pcbddc->use_local_adj) {
2854 
2855       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2856       if (flg_row) {
2857         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2858         pcbddc->computed_rowadj = PETSC_TRUE;
2859       }
2860       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2861     } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */
2862       IS                     is_dummy;
2863       ISLocalToGlobalMapping l2gmap_dummy;
2864       PetscInt               j,sum;
2865       PetscInt               *cxadj,*cadjncy;
2866       const PetscInt         *idxs;
2867       PCBDDCGraph            graph;
2868       PetscBT                is_on_boundary;
2869 
2870       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
2871       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
2872       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
2873       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
2874       ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr);
2875       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
2876       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2877       if (flg_row) {
2878         graph->xadj = xadj;
2879         graph->adjncy = adjncy;
2880       }
2881       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
2882       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
2883       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2884 
2885       if (pcbddc->dbg_flag) {
2886         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr);
2887         for (i=0;i<graph->ncc;i++) {
2888           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
2889         }
2890       }
2891 
2892       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
2893       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2894       for (i=0;i<pcis->n_B;i++) {
2895         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
2896       }
2897       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2898 
2899       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
2900       sum = 0;
2901       for (i=0;i<graph->ncc;i++) {
2902         PetscInt sizecc = 0;
2903         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2904           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2905             sizecc++;
2906           }
2907         }
2908         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2909           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2910             cxadj[graph->queue[j]] = sizecc;
2911           }
2912         }
2913         sum += sizecc*sizecc;
2914       }
2915       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
2916       sum = 0;
2917       for (i=0;i<pcis->n;i++) {
2918         PetscInt temp = cxadj[i];
2919         cxadj[i] = sum;
2920         sum += temp;
2921       }
2922       cxadj[pcis->n] = sum;
2923       for (i=0;i<graph->ncc;i++) {
2924         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2925           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2926             PetscInt k,sizecc = 0;
2927             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
2928               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
2929                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
2930                 sizecc++;
2931               }
2932             }
2933           }
2934         }
2935       }
2936       if (sum) {
2937         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
2938       } else {
2939         ierr = PetscFree(cxadj);CHKERRQ(ierr);
2940         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
2941       }
2942       graph->xadj = 0;
2943       graph->adjncy = 0;
2944       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
2945       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
2946     }
2947   }
2948   if (pcbddc->dbg_flag) {
2949     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2950   }
2951 
2952   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2953   vertex_size = 1;
2954   if (pcbddc->user_provided_isfordofs) {
2955     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2956       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2957       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2958         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2959         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2960       }
2961       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2962       pcbddc->n_ISForDofs = 0;
2963       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2964     }
2965     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2966     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2967   } else {
2968     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2969       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2970       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2971       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2972         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2973       }
2974     }
2975   }
2976 
2977   /* Setup of Graph */
2978   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2979     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2980   }
2981   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2982     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2983   }
2984   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr);
2985 
2986   /* Graph's connected components analysis */
2987   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2988 
2989   /* print some info to stdout */
2990   if (pcbddc->dbg_flag) {
2991     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr);
2992   }
2993 
2994   /* mark topography has done */
2995   pcbddc->recompute_topography = PETSC_FALSE;
2996   PetscFunctionReturn(0);
2997 }
2998 
2999 #undef __FUNCT__
3000 #define __FUNCT__ "PCBDDCSubsetNumbering"
3001 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
3002 {
3003   Vec            local_vec,global_vec;
3004   IS             seqis,paris;
3005   VecScatter     scatter_ctx;
3006   PetscScalar    *array;
3007   PetscInt       *temp_global_dofs;
3008   PetscScalar    globalsum;
3009   PetscInt       i,j,s;
3010   PetscInt       nlocals,first_index,old_index,max_local,max_global;
3011   PetscMPIInt    rank_prec_comm,size_prec_comm;
3012   PetscInt       *dof_sizes,*dof_displs;
3013   PetscBool      first_found;
3014   PetscErrorCode ierr;
3015 
3016   PetscFunctionBegin;
3017   /* mpi buffers */
3018   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
3019   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
3020   j = ( !rank_prec_comm ? size_prec_comm : 0);
3021   ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr);
3022   /* get maximum size of subset */
3023   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
3024   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
3025   max_local = 0;
3026   for (i=0;i<n_local_dofs;i++) {
3027     if (max_local < temp_global_dofs[i] ) {
3028       max_local = temp_global_dofs[i];
3029     }
3030   }
3031   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
3032   max_global++;
3033   max_local = 0;
3034   for (i=0;i<n_local_dofs;i++) {
3035     if (max_local < local_dofs[i] ) {
3036       max_local = local_dofs[i];
3037     }
3038   }
3039   max_local++;
3040   /* allocate workspace */
3041   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
3042   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
3043   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
3044   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
3045   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
3046   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
3047   /* create scatter */
3048   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
3049   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
3050   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
3051   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
3052   ierr = ISDestroy(&paris);CHKERRQ(ierr);
3053   /* init array */
3054   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
3055   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3056   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3057   if (local_dofs_mult) {
3058     for (i=0;i<n_local_dofs;i++) {
3059       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
3060     }
3061   } else {
3062     for (i=0;i<n_local_dofs;i++) {
3063       array[local_dofs[i]]=1.0;
3064     }
3065   }
3066   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3067   /* scatter into global vec and get total number of global dofs */
3068   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3069   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3070   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
3071   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
3072   /* Fill global_vec with cumulative function for global numbering */
3073   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
3074   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
3075   nlocals = 0;
3076   first_index = -1;
3077   first_found = PETSC_FALSE;
3078   for (i=0;i<s;i++) {
3079     if (!first_found && PetscRealPart(array[i]) > 0.1) {
3080       first_found = PETSC_TRUE;
3081       first_index = i;
3082     }
3083     nlocals += (PetscInt)PetscRealPart(array[i]);
3084   }
3085   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3086   if (!rank_prec_comm) {
3087     dof_displs[0]=0;
3088     for (i=1;i<size_prec_comm;i++) {
3089       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3090     }
3091   }
3092   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3093   if (first_found) {
3094     array[first_index] += (PetscScalar)nlocals;
3095     old_index = first_index;
3096     for (i=first_index+1;i<s;i++) {
3097       if (PetscRealPart(array[i]) > 0.1) {
3098         array[i] += array[old_index];
3099         old_index = i;
3100       }
3101     }
3102   }
3103   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
3104   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3105   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3106   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3107   /* get global ordering of local dofs */
3108   ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3109   if (local_dofs_mult) {
3110     for (i=0;i<n_local_dofs;i++) {
3111       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
3112     }
3113   } else {
3114     for (i=0;i<n_local_dofs;i++) {
3115       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
3116     }
3117   }
3118   ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3119   /* free workspace */
3120   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
3121   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
3122   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
3123   ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr);
3124   /* return pointer to global ordering of local dofs */
3125   *global_numbering_subset = temp_global_dofs;
3126   PetscFunctionReturn(0);
3127 }
3128 
3129 #undef __FUNCT__
3130 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3131 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3132 {
3133   PetscInt       i,j;
3134   PetscScalar    *alphas;
3135   PetscErrorCode ierr;
3136 
3137   PetscFunctionBegin;
3138   /* this implements stabilized Gram-Schmidt */
3139   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3140   for (i=0;i<n;i++) {
3141     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3142     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3143     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3144   }
3145   ierr = PetscFree(alphas);CHKERRQ(ierr);
3146   PetscFunctionReturn(0);
3147 }
3148 
3149 #undef __FUNCT__
3150 #define __FUNCT__ "MatISGetSubassemblingPattern"
3151 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
3152 {
3153   Mat             subdomain_adj;
3154   IS              new_ranks,ranks_send_to;
3155   MatPartitioning partitioner;
3156   Mat_IS          *matis;
3157   PetscInt        n_neighs,*neighs,*n_shared,**shared;
3158   PetscInt        prank;
3159   PetscMPIInt     size,rank,color;
3160   PetscInt        *xadj,*adjncy,*oldranks;
3161   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
3162   PetscInt        i,local_size,threshold=0;
3163   PetscErrorCode  ierr;
3164   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3165   PetscSubcomm    subcomm;
3166 
3167   PetscFunctionBegin;
3168   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3169   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3170   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3171 
3172   /* Get info on mapping */
3173   matis = (Mat_IS*)(mat->data);
3174   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3175   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3176 
3177   /* build local CSR graph of subdomains' connectivity */
3178   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3179   xadj[0] = 0;
3180   xadj[1] = PetscMax(n_neighs-1,0);
3181   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3182   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3183 
3184   if (threshold) {
3185     PetscInt xadj_count = 0;
3186     for (i=1;i<n_neighs;i++) {
3187       if (n_shared[i] > threshold) {
3188         adjncy[xadj_count] = neighs[i];
3189         adjncy_wgt[xadj_count] = n_shared[i];
3190         xadj_count++;
3191       }
3192     }
3193     xadj[1] = xadj_count;
3194   } else {
3195     if (xadj[1]) {
3196       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3197       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3198     }
3199   }
3200   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3201   if (use_square) {
3202     for (i=0;i<xadj[1];i++) {
3203       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3204     }
3205   }
3206   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3207 
3208   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3209 
3210   /*
3211     Restrict work on active processes only.
3212   */
3213   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3214   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3215   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3216   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3217   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3218   if (color) {
3219     ierr = PetscFree(xadj);CHKERRQ(ierr);
3220     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3221     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3222   } else {
3223     PetscInt coarsening_ratio;
3224     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
3225     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3226     prank = rank;
3227     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
3228     /*
3229     for (i=0;i<size;i++) {
3230       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3231     }
3232     */
3233     for (i=0;i<xadj[1];i++) {
3234       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3235     }
3236     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3237     ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3238     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3239 
3240     /* Partition */
3241     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
3242     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3243     if (use_vwgt) {
3244       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3245       v_wgt[0] = local_size;
3246       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3247     }
3248     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3249     coarsening_ratio = size/n_subdomains;
3250     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3251     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3252     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3253     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3254 
3255     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3256     if (contiguous) {
3257       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3258     } else {
3259       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3260     }
3261     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3262     /* clean up */
3263     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3264     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3265     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3266     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3267   }
3268   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3269 
3270   /* assemble parallel IS for sends */
3271   i = 1;
3272   if (color) i=0;
3273   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3274 
3275   /* get back IS */
3276   *is_sends = ranks_send_to;
3277   PetscFunctionReturn(0);
3278 }
3279 
3280 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3281 
3282 #undef __FUNCT__
3283 #define __FUNCT__ "MatISSubassemble"
3284 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3285 {
3286   Mat                    local_mat;
3287   Mat_IS                 *matis;
3288   IS                     is_sends_internal;
3289   PetscInt               rows,cols,new_local_rows;
3290   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3291   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3292   ISLocalToGlobalMapping l2gmap;
3293   PetscInt*              l2gmap_indices;
3294   const PetscInt*        is_indices;
3295   MatType                new_local_type;
3296   /* buffers */
3297   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3298   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3299   PetscInt               *recv_buffer_idxs_local;
3300   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3301   /* MPI */
3302   MPI_Comm               comm,comm_n;
3303   PetscSubcomm           subcomm;
3304   PetscMPIInt            n_sends,n_recvs,commsize;
3305   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3306   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3307   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3308   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3309   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3310   PetscErrorCode         ierr;
3311 
3312   PetscFunctionBegin;
3313   /* TODO: add missing checks */
3314   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3315   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3316   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3317   PetscValidLogicalCollectiveInt(mat,nis,7);
3318   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3319   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3320   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3321   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3322   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3323   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3324   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3325   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3326     PetscInt mrows,mcols,mnrows,mncols;
3327     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3328     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3329     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3330     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3331     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3332     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3333   }
3334   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3335   PetscValidLogicalCollectiveInt(mat,bs,0);
3336   /* prepare IS for sending if not provided */
3337   if (!is_sends) {
3338     PetscBool pcontig = PETSC_TRUE;
3339     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3340     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3341   } else {
3342     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3343     is_sends_internal = is_sends;
3344   }
3345 
3346   /* get pointer of MATIS data */
3347   matis = (Mat_IS*)mat->data;
3348 
3349   /* get comm */
3350   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3351 
3352   /* compute number of sends */
3353   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3354   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3355 
3356   /* compute number of receives */
3357   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3358   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3359   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3360   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3361   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3362   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3363   ierr = PetscFree(iflags);CHKERRQ(ierr);
3364 
3365   /* restrict comm if requested */
3366   subcomm = 0;
3367   destroy_mat = PETSC_FALSE;
3368   if (restrict_comm) {
3369     PetscMPIInt color,subcommsize;
3370 
3371     color = 0;
3372     if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */
3373     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3374     subcommsize = commsize - subcommsize;
3375     /* check if reuse has been requested */
3376     if (reuse == MAT_REUSE_MATRIX) {
3377       if (*mat_n) {
3378         PetscMPIInt subcommsize2;
3379         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3380         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3381         comm_n = PetscObjectComm((PetscObject)*mat_n);
3382       } else {
3383         comm_n = PETSC_COMM_SELF;
3384       }
3385     } else { /* MAT_INITIAL_MATRIX */
3386       PetscMPIInt rank;
3387 
3388       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3389       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3390       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3391       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3392       comm_n = PetscSubcommChild(subcomm);
3393     }
3394     /* flag to destroy *mat_n if not significative */
3395     if (color) destroy_mat = PETSC_TRUE;
3396   } else {
3397     comm_n = comm;
3398   }
3399 
3400   /* prepare send/receive buffers */
3401   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3402   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3403   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3404   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3405   if (nis) {
3406     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3407   }
3408 
3409   /* Get data from local matrices */
3410   if (!isdense) {
3411     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3412     /* TODO: See below some guidelines on how to prepare the local buffers */
3413     /*
3414        send_buffer_vals should contain the raw values of the local matrix
3415        send_buffer_idxs should contain:
3416        - MatType_PRIVATE type
3417        - PetscInt        size_of_l2gmap
3418        - PetscInt        global_row_indices[size_of_l2gmap]
3419        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3420     */
3421   } else {
3422     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3423     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3424     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3425     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3426     send_buffer_idxs[1] = i;
3427     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3428     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3429     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3430     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3431     for (i=0;i<n_sends;i++) {
3432       ilengths_vals[is_indices[i]] = len*len;
3433       ilengths_idxs[is_indices[i]] = len+2;
3434     }
3435   }
3436   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3437   /* additional is (if any) */
3438   if (nis) {
3439     PetscMPIInt psum;
3440     PetscInt j;
3441     for (j=0,psum=0;j<nis;j++) {
3442       PetscInt plen;
3443       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3444       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3445       psum += len+1; /* indices + lenght */
3446     }
3447     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3448     for (j=0,psum=0;j<nis;j++) {
3449       PetscInt plen;
3450       const PetscInt *is_array_idxs;
3451       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3452       send_buffer_idxs_is[psum] = plen;
3453       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3454       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3455       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3456       psum += plen+1; /* indices + lenght */
3457     }
3458     for (i=0;i<n_sends;i++) {
3459       ilengths_idxs_is[is_indices[i]] = psum;
3460     }
3461     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3462   }
3463 
3464   buf_size_idxs = 0;
3465   buf_size_vals = 0;
3466   buf_size_idxs_is = 0;
3467   for (i=0;i<n_recvs;i++) {
3468     buf_size_idxs += (PetscInt)olengths_idxs[i];
3469     buf_size_vals += (PetscInt)olengths_vals[i];
3470     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3471   }
3472   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3473   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3474   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3475 
3476   /* get new tags for clean communications */
3477   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3478   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3479   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3480 
3481   /* allocate for requests */
3482   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3483   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3484   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3485   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3486   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3487   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3488 
3489   /* communications */
3490   ptr_idxs = recv_buffer_idxs;
3491   ptr_vals = recv_buffer_vals;
3492   ptr_idxs_is = recv_buffer_idxs_is;
3493   for (i=0;i<n_recvs;i++) {
3494     source_dest = onodes[i];
3495     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3496     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3497     ptr_idxs += olengths_idxs[i];
3498     ptr_vals += olengths_vals[i];
3499     if (nis) {
3500       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3501       ptr_idxs_is += olengths_idxs_is[i];
3502     }
3503   }
3504   for (i=0;i<n_sends;i++) {
3505     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3506     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3507     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3508     if (nis) {
3509       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3510     }
3511   }
3512   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3513   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3514 
3515   /* assemble new l2g map */
3516   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3517   ptr_idxs = recv_buffer_idxs;
3518   new_local_rows = 0;
3519   for (i=0;i<n_recvs;i++) {
3520     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3521     ptr_idxs += olengths_idxs[i];
3522   }
3523   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3524   ptr_idxs = recv_buffer_idxs;
3525   new_local_rows = 0;
3526   for (i=0;i<n_recvs;i++) {
3527     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3528     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3529     ptr_idxs += olengths_idxs[i];
3530   }
3531   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3532   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3533   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3534 
3535   /* infer new local matrix type from received local matrices type */
3536   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3537   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3538   if (n_recvs) {
3539     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3540     ptr_idxs = recv_buffer_idxs;
3541     for (i=0;i<n_recvs;i++) {
3542       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3543         new_local_type_private = MATAIJ_PRIVATE;
3544         break;
3545       }
3546       ptr_idxs += olengths_idxs[i];
3547     }
3548     switch (new_local_type_private) {
3549       case MATDENSE_PRIVATE:
3550         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3551           new_local_type = MATSEQAIJ;
3552           bs = 1;
3553         } else { /* if I receive only 1 dense matrix */
3554           new_local_type = MATSEQDENSE;
3555           bs = 1;
3556         }
3557         break;
3558       case MATAIJ_PRIVATE:
3559         new_local_type = MATSEQAIJ;
3560         bs = 1;
3561         break;
3562       case MATBAIJ_PRIVATE:
3563         new_local_type = MATSEQBAIJ;
3564         break;
3565       case MATSBAIJ_PRIVATE:
3566         new_local_type = MATSEQSBAIJ;
3567         break;
3568       default:
3569         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3570         break;
3571     }
3572   } else { /* by default, new_local_type is seqdense */
3573     new_local_type = MATSEQDENSE;
3574     bs = 1;
3575   }
3576 
3577   /* create MATIS object if needed */
3578   if (reuse == MAT_INITIAL_MATRIX) {
3579     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3580     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3581   } else {
3582     /* it also destroys the local matrices */
3583     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3584   }
3585   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3586   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3587 
3588   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3589 
3590   /* Global to local map of received indices */
3591   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3592   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3593   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3594 
3595   /* restore attributes -> type of incoming data and its size */
3596   buf_size_idxs = 0;
3597   for (i=0;i<n_recvs;i++) {
3598     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3599     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3600     buf_size_idxs += (PetscInt)olengths_idxs[i];
3601   }
3602   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3603 
3604   /* set preallocation */
3605   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3606   if (!newisdense) {
3607     PetscInt *new_local_nnz=0;
3608 
3609     ptr_vals = recv_buffer_vals;
3610     ptr_idxs = recv_buffer_idxs_local;
3611     if (n_recvs) {
3612       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3613     }
3614     for (i=0;i<n_recvs;i++) {
3615       PetscInt j;
3616       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3617         for (j=0;j<*(ptr_idxs+1);j++) {
3618           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3619         }
3620       } else {
3621         /* TODO */
3622       }
3623       ptr_idxs += olengths_idxs[i];
3624     }
3625     if (new_local_nnz) {
3626       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3627       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3628       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3629       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3630       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3631       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3632     } else {
3633       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3634     }
3635     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3636   } else {
3637     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3638   }
3639 
3640   /* set values */
3641   ptr_vals = recv_buffer_vals;
3642   ptr_idxs = recv_buffer_idxs_local;
3643   for (i=0;i<n_recvs;i++) {
3644     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3645       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3646       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3647       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3648       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3649       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3650     } else {
3651       /* TODO */
3652     }
3653     ptr_idxs += olengths_idxs[i];
3654     ptr_vals += olengths_vals[i];
3655   }
3656   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3657   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3658   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3659   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3660   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3661   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3662 
3663 #if 0
3664   if (!restrict_comm) { /* check */
3665     Vec       lvec,rvec;
3666     PetscReal infty_error;
3667 
3668     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3669     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3670     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3671     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3672     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3673     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3674     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3675     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3676     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3677   }
3678 #endif
3679 
3680   /* assemble new additional is (if any) */
3681   if (nis) {
3682     PetscInt **temp_idxs,*count_is,j,psum;
3683 
3684     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3685     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3686     ptr_idxs = recv_buffer_idxs_is;
3687     psum = 0;
3688     for (i=0;i<n_recvs;i++) {
3689       for (j=0;j<nis;j++) {
3690         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3691         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3692         psum += plen;
3693         ptr_idxs += plen+1; /* shift pointer to received data */
3694       }
3695     }
3696     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3697     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3698     for (i=1;i<nis;i++) {
3699       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3700     }
3701     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3702     ptr_idxs = recv_buffer_idxs_is;
3703     for (i=0;i<n_recvs;i++) {
3704       for (j=0;j<nis;j++) {
3705         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3706         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3707         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3708         ptr_idxs += plen+1; /* shift pointer to received data */
3709       }
3710     }
3711     for (i=0;i<nis;i++) {
3712       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3713       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3714       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3715     }
3716     ierr = PetscFree(count_is);CHKERRQ(ierr);
3717     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3718     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3719   }
3720   /* free workspace */
3721   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3722   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3723   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3724   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3725   if (isdense) {
3726     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3727     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3728   } else {
3729     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3730   }
3731   if (nis) {
3732     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3733     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3734   }
3735   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3736   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3737   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3738   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3739   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3740   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3741   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3742   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3743   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3744   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3745   ierr = PetscFree(onodes);CHKERRQ(ierr);
3746   if (nis) {
3747     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3748     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3749     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3750   }
3751   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3752   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3753     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3754     for (i=0;i<nis;i++) {
3755       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3756     }
3757   }
3758   PetscFunctionReturn(0);
3759 }
3760 
3761 /* temporary hack into ksp private data structure */
3762 #include <petsc/private/kspimpl.h>
3763 
3764 #undef __FUNCT__
3765 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3766 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3767 {
3768   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3769   PC_IS                  *pcis = (PC_IS*)pc->data;
3770   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3771   MatNullSpace           CoarseNullSpace=NULL;
3772   ISLocalToGlobalMapping coarse_islg;
3773   IS                     coarse_is,*isarray;
3774   PetscInt               i,im_active=-1,active_procs=-1;
3775   PetscInt               nis,nisdofs,nisneu;
3776   PC                     pc_temp;
3777   PCType                 coarse_pc_type;
3778   KSPType                coarse_ksp_type;
3779   PetscBool              multilevel_requested,multilevel_allowed;
3780   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3781   Mat                    t_coarse_mat_is;
3782   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3783   PetscMPIInt            all_procs;
3784   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3785   PetscBool              compute_vecs = PETSC_FALSE;
3786   PetscScalar            *array;
3787   PetscErrorCode         ierr;
3788 
3789   PetscFunctionBegin;
3790   /* Assign global numbering to coarse dofs */
3791   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3792     PetscInt ocoarse_size;
3793     compute_vecs = PETSC_TRUE;
3794     ocoarse_size = pcbddc->coarse_size;
3795     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3796     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3797     /* see if we can avoid some work */
3798     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3799       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3800         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3801         coarse_reuse = PETSC_FALSE;
3802       } else { /* we can safely reuse already computed coarse matrix */
3803         coarse_reuse = PETSC_TRUE;
3804       }
3805     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3806       coarse_reuse = PETSC_FALSE;
3807     }
3808     /* reset any subassembling information */
3809     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3810     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3811   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3812     coarse_reuse = PETSC_TRUE;
3813   }
3814 
3815   /* count "active" (i.e. with positive local size) and "void" processes */
3816   im_active = !!(pcis->n);
3817   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3818   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3819   void_procs = all_procs-active_procs;
3820   csin_type_simple = PETSC_TRUE;
3821   redist = PETSC_FALSE;
3822   if (pcbddc->current_level && void_procs) {
3823     csin_ml = PETSC_TRUE;
3824     ncoarse_ml = void_procs;
3825     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
3826     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
3827       csin_ds = PETSC_TRUE;
3828       ncoarse_ds = pcbddc->redistribute_coarse;
3829       redist = PETSC_TRUE;
3830     } else {
3831       csin_ds = PETSC_TRUE;
3832       ncoarse_ds = active_procs;
3833       redist = PETSC_TRUE;
3834     }
3835   } else {
3836     csin_ml = PETSC_FALSE;
3837     ncoarse_ml = all_procs;
3838     if (void_procs) {
3839       csin_ds = PETSC_TRUE;
3840       ncoarse_ds = void_procs;
3841       csin_type_simple = PETSC_FALSE;
3842     } else {
3843       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
3844         csin_ds = PETSC_TRUE;
3845         ncoarse_ds = pcbddc->redistribute_coarse;
3846         redist = PETSC_TRUE;
3847       } else {
3848         csin_ds = PETSC_FALSE;
3849         ncoarse_ds = all_procs;
3850       }
3851     }
3852   }
3853 
3854   /*
3855     test if we can go multilevel: three conditions must be satisfied:
3856     - we have not exceeded the number of levels requested
3857     - we can actually subassemble the active processes
3858     - we can find a suitable number of MPI processes where we can place the subassembled problem
3859   */
3860   multilevel_allowed = PETSC_FALSE;
3861   multilevel_requested = PETSC_FALSE;
3862   if (pcbddc->current_level < pcbddc->max_levels) {
3863     multilevel_requested = PETSC_TRUE;
3864     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3865       multilevel_allowed = PETSC_FALSE;
3866     } else {
3867       multilevel_allowed = PETSC_TRUE;
3868     }
3869   }
3870   /* determine number of process partecipating to coarse solver */
3871   if (multilevel_allowed) {
3872     ncoarse = ncoarse_ml;
3873     csin = csin_ml;
3874     redist = PETSC_FALSE;
3875   } else {
3876     ncoarse = ncoarse_ds;
3877     csin = csin_ds;
3878   }
3879 
3880   /* creates temporary l2gmap and IS for coarse indexes */
3881   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3882   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3883 
3884   /* creates temporary MATIS object for coarse matrix */
3885   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
3886   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3887   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
3888   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3889 #if 0
3890   {
3891     PetscViewer viewer;
3892     char filename[256];
3893     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
3894     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
3895     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3896     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
3897     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3898   }
3899 #endif
3900   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
3901   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3902   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3903   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3904   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3905 
3906   /* compute dofs splitting and neumann boundaries for coarse dofs */
3907   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
3908     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3909     const PetscInt         *idxs;
3910     ISLocalToGlobalMapping tmap;
3911 
3912     /* create map between primal indices (in local representative ordering) and local primal numbering */
3913     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3914     /* allocate space for temporary storage */
3915     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
3916     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
3917     /* allocate for IS array */
3918     nisdofs = pcbddc->n_ISForDofsLocal;
3919     nisneu = !!pcbddc->NeumannBoundariesLocal;
3920     nis = nisdofs + nisneu;
3921     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
3922     /* dofs splitting */
3923     for (i=0;i<nisdofs;i++) {
3924       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
3925       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
3926       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3927       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3928       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3929       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3930       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3931       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
3932     }
3933     /* neumann boundaries */
3934     if (pcbddc->NeumannBoundariesLocal) {
3935       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
3936       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
3937       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3938       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3939       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3940       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3941       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
3942       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
3943     }
3944     /* free memory */
3945     ierr = PetscFree(tidxs);CHKERRQ(ierr);
3946     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
3947     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
3948   } else {
3949     nis = 0;
3950     nisdofs = 0;
3951     nisneu = 0;
3952     isarray = NULL;
3953   }
3954   /* destroy no longer needed map */
3955   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3956 
3957   /* restrict on coarse candidates (if needed) */
3958   coarse_mat_is = NULL;
3959   if (csin) {
3960     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
3961       if (redist) {
3962         PetscMPIInt rank;
3963         PetscInt    spc,n_spc_p1,dest[1],destsize;
3964 
3965         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
3966         spc = active_procs/ncoarse;
3967         n_spc_p1 = active_procs%ncoarse;
3968         if (im_active) {
3969           destsize = 1;
3970           if (rank > n_spc_p1*(spc+1)-1) {
3971             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
3972           } else {
3973             dest[0] = rank/(spc+1);
3974           }
3975         } else {
3976           destsize = 0;
3977         }
3978         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3979       } else if (csin_type_simple) {
3980         PetscMPIInt rank;
3981         PetscInt    issize,isidx;
3982 
3983         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
3984         if (im_active) {
3985           issize = 1;
3986           isidx = (PetscInt)rank;
3987         } else {
3988           issize = 0;
3989           isidx = -1;
3990         }
3991         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3992       } else { /* get a suitable subassembling pattern from MATIS code */
3993         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3994       }
3995 
3996       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
3997       if (!redist || ncoarse <= void_procs) {
3998         PetscInt ncoarse_cand,tissize,*nisindices;
3999         PetscInt *coarse_candidates;
4000         const PetscInt* tisindices;
4001 
4002         /* get coarse candidates' ranks in pc communicator */
4003         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4004         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4005         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4006           if (!coarse_candidates[i]) {
4007             coarse_candidates[ncoarse_cand++]=i;
4008           }
4009         }
4010         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4011 
4012 
4013         if (pcbddc->dbg_flag) {
4014           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4015           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4016           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4017           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4018           for (i=0;i<ncoarse_cand;i++) {
4019             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4020           }
4021           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4022           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4023         }
4024         /* shift the pattern on coarse candidates */
4025         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4026         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4027         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4028         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4029         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4030         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4031         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4032       }
4033       if (pcbddc->dbg_flag) {
4034         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4035         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4036         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4037         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4038       }
4039     }
4040     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4041     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4042   } else {
4043     if (pcbddc->dbg_flag) {
4044       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4045       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4046       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4047     }
4048     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4049     coarse_mat_is = t_coarse_mat_is;
4050   }
4051 
4052   /* create local to global scatters for coarse problem */
4053   if (compute_vecs) {
4054     PetscInt lrows;
4055     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4056     if (coarse_mat_is) {
4057       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4058     } else {
4059       lrows = 0;
4060     }
4061     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4062     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4063     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4064     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4065     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4066   }
4067   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4068   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4069 
4070   /* set defaults for coarse KSP and PC */
4071   if (multilevel_allowed) {
4072     coarse_ksp_type = KSPRICHARDSON;
4073     coarse_pc_type = PCBDDC;
4074   } else {
4075     coarse_ksp_type = KSPPREONLY;
4076     coarse_pc_type = PCREDUNDANT;
4077   }
4078 
4079   /* print some info if requested */
4080   if (pcbddc->dbg_flag) {
4081     if (!multilevel_allowed) {
4082       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4083       if (multilevel_requested) {
4084         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4085       } else if (pcbddc->max_levels) {
4086         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4087       }
4088       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4089     }
4090   }
4091 
4092   /* create the coarse KSP object only once with defaults */
4093   if (coarse_mat_is) {
4094     MatReuse coarse_mat_reuse;
4095     PetscViewer dbg_viewer = NULL;
4096     if (pcbddc->dbg_flag) {
4097       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4098       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4099     }
4100     if (!pcbddc->coarse_ksp) {
4101       char prefix[256],str_level[16];
4102       size_t len;
4103       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4104       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4105       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4106       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4107       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4108       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4109       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4110       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4111       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4112       /* prefix */
4113       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4114       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4115       if (!pcbddc->current_level) {
4116         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4117         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4118       } else {
4119         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4120         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4121         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4122         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4123         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4124         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4125       }
4126       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4127       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4128       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4129       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4130       ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4131       /* allow user customization */
4132       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4133     }
4134     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4135     if (nisdofs) {
4136       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4137       for (i=0;i<nisdofs;i++) {
4138         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4139       }
4140     }
4141     if (nisneu) {
4142       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4143       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4144     }
4145 
4146     /* get some info after set from options */
4147     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4148     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4149     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4150     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4151     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4152       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4153       isbddc = PETSC_FALSE;
4154     }
4155     if (isredundant) {
4156       KSP inner_ksp;
4157       PC inner_pc;
4158       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4159       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4160       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4161     }
4162 
4163     /* assemble coarse matrix */
4164     if (coarse_reuse) {
4165       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4166       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4167       coarse_mat_reuse = MAT_REUSE_MATRIX;
4168     } else {
4169       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4170     }
4171     if (isbddc || isnn) {
4172       if (pcbddc->coarsening_ratio > 1) {
4173         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4174           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4175           if (pcbddc->dbg_flag) {
4176             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4177             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4178             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4179             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4180           }
4181         }
4182         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4183       } else {
4184         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4185         coarse_mat = coarse_mat_is;
4186       }
4187     } else {
4188       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4189     }
4190     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4191 
4192     /* propagate symmetry info to coarse matrix */
4193     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
4194     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4195 
4196     /* set operators */
4197     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4198     if (pcbddc->dbg_flag) {
4199       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4200     }
4201   } else { /* processes non partecipating to coarse solver (if any) */
4202     coarse_mat = 0;
4203   }
4204   ierr = PetscFree(isarray);CHKERRQ(ierr);
4205 #if 0
4206   {
4207     PetscViewer viewer;
4208     char filename[256];
4209     sprintf(filename,"coarse_mat.m");
4210     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4211     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4212     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4213     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4214   }
4215 #endif
4216 
4217   /* Compute coarse null space (special handling by BDDC only) */
4218   if (pcbddc->NullSpace) {
4219     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4220   }
4221 
4222   if (pcbddc->coarse_ksp) {
4223     Vec crhs,csol;
4224     PetscBool ispreonly;
4225     if (CoarseNullSpace) {
4226       if (isbddc) {
4227         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4228       } else {
4229         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4230       }
4231     }
4232     /* setup coarse ksp */
4233     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4234     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4235     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4236     /* hack */
4237     if (!csol) {
4238       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4239     }
4240     if (!crhs) {
4241       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4242     }
4243     /* Check coarse problem if in debug mode or if solving with an iterative method */
4244     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4245     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4246       KSP       check_ksp;
4247       KSPType   check_ksp_type;
4248       PC        check_pc;
4249       Vec       check_vec,coarse_vec;
4250       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4251       PetscInt  its;
4252       PetscBool compute_eigs;
4253       PetscReal *eigs_r,*eigs_c;
4254       PetscInt  neigs;
4255       const char *prefix;
4256 
4257       /* Create ksp object suitable for estimation of extreme eigenvalues */
4258       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4259       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4260       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4261       if (ispreonly) {
4262         check_ksp_type = KSPPREONLY;
4263         compute_eigs = PETSC_FALSE;
4264       } else {
4265         check_ksp_type = KSPGMRES;
4266         compute_eigs = PETSC_TRUE;
4267       }
4268       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4269       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4270       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4271       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4272       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4273       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4274       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4275       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4276       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4277       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4278       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4279       /* create random vec */
4280       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4281       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4282       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4283       if (CoarseNullSpace) {
4284         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4285       }
4286       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4287       /* solve coarse problem */
4288       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4289       if (CoarseNullSpace) {
4290         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4291       }
4292       /* set eigenvalue estimation if preonly has not been requested */
4293       if (compute_eigs) {
4294         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4295         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4296         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4297         lambda_max = eigs_r[neigs-1];
4298         lambda_min = eigs_r[0];
4299         if (pcbddc->use_coarse_estimates) {
4300           if (lambda_max>lambda_min) {
4301             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4302             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4303           }
4304         }
4305       }
4306 
4307       /* check coarse problem residual error */
4308       if (pcbddc->dbg_flag) {
4309         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4310         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4311         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4312         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4313         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4314         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4315         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4316         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4317         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4318         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4319         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4320         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4321         if (compute_eigs) {
4322           PetscReal lambda_max_s,lambda_min_s;
4323           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4324           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4325           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4326           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4327           for (i=0;i<neigs;i++) {
4328             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4329           }
4330         }
4331         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4332         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4333       }
4334       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4335       if (compute_eigs) {
4336         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4337         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4338       }
4339     }
4340   }
4341   /* print additional info */
4342   if (pcbddc->dbg_flag) {
4343     /* waits until all processes reaches this point */
4344     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4345     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4346     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4347   }
4348 
4349   /* free memory */
4350   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4351   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4352   PetscFunctionReturn(0);
4353 }
4354 
4355 #undef __FUNCT__
4356 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4357 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4358 {
4359   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4360   PC_IS*         pcis = (PC_IS*)pc->data;
4361   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4362   PetscInt       i,coarse_size=0;
4363   PetscInt       *local_primal_indices=NULL;
4364   PetscErrorCode ierr;
4365 
4366   PetscFunctionBegin;
4367   /* Compute global number of coarse dofs */
4368   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
4369     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
4370   }
4371   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
4372 
4373   /* check numbering */
4374   if (pcbddc->dbg_flag) {
4375     PetscScalar coarsesum,*array;
4376     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4377 
4378     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4379     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4380     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4381     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4382     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4383     for (i=0;i<pcbddc->local_primal_size;i++) {
4384       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4385     }
4386     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4387     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4388     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4389     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4390     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4391     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4392     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4393     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4394     for (i=0;i<pcis->n;i++) {
4395       if (array[i] == 1.0) {
4396         set_error = PETSC_TRUE;
4397         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4398       }
4399     }
4400     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4401     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4402     for (i=0;i<pcis->n;i++) {
4403       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4404     }
4405     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4406     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4407     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4408     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4409     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4410     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4411     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4412       PetscInt *gidxs;
4413 
4414       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
4415       ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
4416       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4417       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4418       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4419       for (i=0;i<pcbddc->local_primal_size;i++) {
4420         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr);
4421       }
4422       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4423       ierr = PetscFree(gidxs);CHKERRQ(ierr);
4424     }
4425     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4426     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4427   }
4428   /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */
4429   /* get back data */
4430   *coarse_size_n = coarse_size;
4431   *local_primal_indices_n = local_primal_indices;
4432   PetscFunctionReturn(0);
4433 }
4434 
4435 #undef __FUNCT__
4436 #define __FUNCT__ "PCBDDCGlobalToLocal"
4437 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4438 {
4439   IS             localis_t;
4440   PetscInt       i,lsize,*idxs,n;
4441   PetscScalar    *vals;
4442   PetscErrorCode ierr;
4443 
4444   PetscFunctionBegin;
4445   /* get indices in local ordering exploiting local to global map */
4446   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4447   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4448   for (i=0;i<lsize;i++) vals[i] = 1.0;
4449   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4450   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4451   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4452   if (idxs) { /* multilevel guard */
4453     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4454   }
4455   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4456   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4457   ierr = PetscFree(vals);CHKERRQ(ierr);
4458   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4459   /* now compute set in local ordering */
4460   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4461   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4462   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4463   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4464   for (i=0,lsize=0;i<n;i++) {
4465     if (PetscRealPart(vals[i]) > 0.5) {
4466       lsize++;
4467     }
4468   }
4469   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4470   for (i=0,lsize=0;i<n;i++) {
4471     if (PetscRealPart(vals[i]) > 0.5) {
4472       idxs[lsize++] = i;
4473     }
4474   }
4475   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4476   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4477   *localis = localis_t;
4478   PetscFunctionReturn(0);
4479 }
4480 
4481 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4482 #undef __FUNCT__
4483 #define __FUNCT__ "PCBDDCMatMult_Private"
4484 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4485 {
4486   PCBDDCChange_ctx change_ctx;
4487   PetscErrorCode   ierr;
4488 
4489   PetscFunctionBegin;
4490   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4491   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4492   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4493   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4494   PetscFunctionReturn(0);
4495 }
4496 
4497 #undef __FUNCT__
4498 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4499 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4500 {
4501   PCBDDCChange_ctx change_ctx;
4502   PetscErrorCode   ierr;
4503 
4504   PetscFunctionBegin;
4505   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4506   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4507   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4508   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4509   PetscFunctionReturn(0);
4510 }
4511 
4512 #undef __FUNCT__
4513 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4514 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4515 {
4516   PC_IS               *pcis=(PC_IS*)pc->data;
4517   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4518   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4519   Mat                 S_j;
4520   PetscInt            *used_xadj,*used_adjncy;
4521   PetscBool           free_used_adj;
4522   PetscErrorCode      ierr;
4523 
4524   PetscFunctionBegin;
4525   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4526   free_used_adj = PETSC_FALSE;
4527   if (pcbddc->sub_schurs_layers == -1) {
4528     used_xadj = NULL;
4529     used_adjncy = NULL;
4530   } else {
4531     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4532       used_xadj = pcbddc->mat_graph->xadj;
4533       used_adjncy = pcbddc->mat_graph->adjncy;
4534     } else if (pcbddc->computed_rowadj) {
4535       used_xadj = pcbddc->mat_graph->xadj;
4536       used_adjncy = pcbddc->mat_graph->adjncy;
4537     } else {
4538       PetscBool      flg_row=PETSC_FALSE;
4539       const PetscInt *xadj,*adjncy;
4540       PetscInt       nvtxs;
4541 
4542       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4543       if (flg_row) {
4544         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4545         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4546         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4547         free_used_adj = PETSC_TRUE;
4548       } else {
4549         pcbddc->sub_schurs_layers = -1;
4550         used_xadj = NULL;
4551         used_adjncy = NULL;
4552       }
4553       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4554     }
4555   }
4556 
4557   /* setup sub_schurs data */
4558   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4559   if (!sub_schurs->use_mumps) {
4560     /* pcbddc->ksp_D up to date only if not using MUMPS */
4561     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4562     ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4563   } else {
4564     ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4565   }
4566   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4567 
4568   /* free adjacency */
4569   if (free_used_adj) {
4570     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4571   }
4572   PetscFunctionReturn(0);
4573 }
4574 
4575 #undef __FUNCT__
4576 #define __FUNCT__ "PCBDDCInitSubSchurs"
4577 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4578 {
4579   PC_IS               *pcis=(PC_IS*)pc->data;
4580   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4581   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4582   PCBDDCGraph         graph;
4583   PetscErrorCode      ierr;
4584 
4585   PetscFunctionBegin;
4586   /* attach interface graph for determining subsets */
4587   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4588     IS verticesIS;
4589 
4590     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4591     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4592     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr);
4593     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr);
4594     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4595     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4596 /*
4597     if (pcbddc->dbg_flag) {
4598       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4599     }
4600 */
4601   } else {
4602     graph = pcbddc->mat_graph;
4603   }
4604 
4605   /* sub_schurs init */
4606   ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr);
4607 
4608   /* free graph struct */
4609   if (pcbddc->sub_schurs_rebuild) {
4610     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4611   }
4612   PetscFunctionReturn(0);
4613 }
4614