xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 53892102fb9112b8e4ac1fa69e7999c2e82d7409)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscReal       *eigs,thresh;
20   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
21   PetscBool       allocated_S_St;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscErrorCode  ierr;
26 
27   PetscFunctionBegin;
28   if (!sub_schurs->use_mumps) {
29     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
30   }
31 
32   if (pcbddc->dbg_flag) {
33     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
34     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
35     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
36     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
37   }
38 
39   if (pcbddc->dbg_flag) {
40     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
41   }
42 
43   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
44     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
45   }
46 
47   /* max size of subsets */
48   mss = 0;
49   for (i=0;i<sub_schurs->n_subs;i++) {
50     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
51       PetscInt subset_size;
52       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
53       mss = PetscMax(mss,subset_size);
54     }
55   }
56 
57   /* min/max and threshold */
58   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
59   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
60   nmax = PetscMax(nmin,nmax);
61   allocated_S_St = PETSC_FALSE;
62   if (nmin) {
63     allocated_S_St = PETSC_TRUE;
64   }
65 
66   /* allocate lapack workspace */
67   cum = cum2 = 0;
68   maxneigs = 0;
69   for (i=0;i<sub_schurs->n_subs;i++) {
70     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
71       PetscInt n,subset_size;
72 
73       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
74       n = PetscMin(subset_size,nmax);
75       cum += subset_size;
76       cum2 += subset_size*n;
77       maxneigs = PetscMax(maxneigs,n);
78     }
79   }
80   if (mss) {
81     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
82       PetscBLASInt B_itype = 1;
83       PetscBLASInt B_N = mss;
84       PetscReal    zero = 0.0;
85       PetscReal    eps = 0.0; /* dlamch? */
86 
87       B_lwork = -1;
88       S = NULL;
89       St = NULL;
90       eigs = NULL;
91       eigv = NULL;
92       B_iwork = NULL;
93       B_ifail = NULL;
94       thresh = 1.0;
95       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
96 #if defined(PETSC_USE_COMPLEX)
97       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
98 #else
99       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
100 #endif
101       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
102       ierr = PetscFPTrapPop();CHKERRQ(ierr);
103     } else {
104         /* TODO */
105     }
106   } else {
107     lwork = 0;
108   }
109 
110   nv = 0;
111   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
112     ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr);
113   }
114   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
115   if (allocated_S_St) {
116     ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr);
117   }
118   ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
119 #if defined(PETSC_USE_COMPLEX)
120   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
121 #endif
122   ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
123                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
124                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
125                       nv+cum,&pcbddc->adaptive_constraints_idxs,
126                       nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
127   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
128 
129   maxneigs = 0;
130   cum = cum2 = cumarray = 0;
131   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
132   pcbddc->adaptive_constraints_data_ptr[0] = 0;
133   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
134     const PetscInt *idxs;
135 
136     ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
137     for (cum=0;cum<nv;cum++) {
138       pcbddc->adaptive_constraints_n[cum] = 1;
139       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
140       pcbddc->adaptive_constraints_data[cum] = 1.0;
141       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
142       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
143     }
144     cum2 = cum;
145     ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
146   }
147 
148   if (mss) { /* multilevel */
149     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
150     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
151   }
152 
153   for (i=0;i<sub_schurs->n_subs;i++) {
154     PetscInt j,subset_size;
155 
156     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
157     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
158       const PetscInt *idxs;
159       PetscReal      infty = PETSC_MAX_REAL;
160       PetscInt       eigs_start = 0;
161       PetscBLASInt   B_N;
162 
163       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
164       if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
165         if (sub_schurs->is_hermitian) {
166           PetscInt j;
167           for (j=0;j<subset_size;j++) {
168             ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
169           }
170           for (j=0;j<subset_size;j++) {
171             ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
172           }
173         } else {
174           ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
175           ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
176         }
177       } else {
178         S = Sarray + cumarray;
179         St = Starray + cumarray;
180       }
181 
182       /* Threshold: this is an heuristic for edges */
183       ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
184       thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold;
185 
186       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
187         PetscBLASInt B_itype = 1;
188         PetscBLASInt B_IL, B_IU;
189         PetscReal    eps = -1.0; /* dlamch? */
190         PetscInt     nmin_s;
191 
192         /* ask for eigenvalues larger than thresh */
193         if (pcbddc->dbg_flag) {
194           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
195         }
196         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
197 #if defined(PETSC_USE_COMPLEX)
198         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
199 #else
200         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
201 #endif
202         ierr = PetscFPTrapPop();CHKERRQ(ierr);
203         if (B_ierr) {
204           if (B_ierr < 0 ) {
205             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
206           } else if (B_ierr <= B_N) {
207             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
208           } else {
209             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
210           }
211         }
212 
213         if (B_neigs > nmax) {
214           if (pcbddc->dbg_flag) {
215             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
216           }
217           eigs_start = B_neigs -nmax;
218           B_neigs = nmax;
219         }
220 
221         nmin_s = PetscMin(nmin,B_N);
222         if (B_neigs < nmin_s) {
223           PetscBLASInt B_neigs2;
224 
225           B_IU = B_N - B_neigs;
226           B_IL = B_N - nmin_s + 1;
227           if (pcbddc->dbg_flag) {
228             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
229           }
230           if (sub_schurs->is_hermitian) {
231             PetscInt j;
232             for (j=0;j<subset_size;j++) {
233               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
234             }
235             for (j=0;j<subset_size;j++) {
236               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
237             }
238           } else {
239             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
240             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
241           }
242           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
243 #if defined(PETSC_USE_COMPLEX)
244           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
245 #else
246           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
247 #endif
248           ierr = PetscFPTrapPop();CHKERRQ(ierr);
249           B_neigs += B_neigs2;
250         }
251         if (B_ierr) {
252           if (B_ierr < 0 ) {
253             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
254           } else if (B_ierr <= B_N) {
255             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
256           } else {
257             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
258           }
259         }
260         if (pcbddc->dbg_flag) {
261           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);
262           for (j=0;j<B_neigs;j++) {
263             if (eigs[j] == 0.0) {
264               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");
265             } else {
266               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);
267             }
268           }
269         }
270       } else {
271           /* TODO */
272       }
273 
274       maxneigs = PetscMax(B_neigs,maxneigs);
275       pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
276       if (B_neigs) {
277         ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
278 
279         if (pcbddc->dbg_flag > 1) {
280           PetscInt ii;
281           for (ii=0;ii<B_neigs;ii++) {
282             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
283             for (j=0;j<B_N;j++) {
284               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
285             }
286           }
287         }
288 #if 0
289         for (j=0;j<B_neigs;j++) {
290           PetscBLASInt Blas_N,Blas_one = 1.0;
291           PetscScalar norm;
292           ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
293           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,
294                                                      &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
295           if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
296             norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
297           } else {
298             norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
299           }
300           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
301         }
302 #endif
303         ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
304         pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
305         pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
306         cum++;
307       }
308       ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
309     }
310     /* shift for next computation */
311     cumarray += subset_size*subset_size;
312   }
313   if (pcbddc->dbg_flag) {
314     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
315   }
316 
317   if (mss) {
318     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
319     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
320     /* destroy matrices (junk) */
321     ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
322     ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
323   }
324   if (allocated_S_St) {
325     ierr = PetscFree2(S,St);CHKERRQ(ierr);
326   }
327   ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
328 #if defined(PETSC_USE_COMPLEX)
329   ierr = PetscFree(rwork);CHKERRQ(ierr);
330 #endif
331   if (pcbddc->dbg_flag) {
332     PetscInt maxneigs_r;
333     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
334     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
335   }
336   PetscFunctionReturn(0);
337 }
338 
339 #undef __FUNCT__
340 #define __FUNCT__ "PCBDDCSetUpSolvers"
341 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
342 {
343   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
344   PetscScalar    *coarse_submat_vals;
345   PetscErrorCode ierr;
346 
347   PetscFunctionBegin;
348   /* Setup local scatters R_to_B and (optionally) R_to_D */
349   /* PCBDDCSetUpLocalWorkVectors should be called first! */
350   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
351 
352   /* Setup local neumann solver ksp_R */
353   /* PCBDDCSetUpLocalScatters should be called first! */
354   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
355 
356   /* Change global null space passed in by the user if change of basis has been requested */
357   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
358     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
359   }
360 
361   /*
362      Setup local correction and local part of coarse basis.
363      Gives back the dense local part of the coarse matrix in column major ordering
364   */
365   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
366 
367   /* Compute total number of coarse nodes and setup coarse solver */
368   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
369 
370   /* free */
371   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
372   PetscFunctionReturn(0);
373 }
374 
375 #undef __FUNCT__
376 #define __FUNCT__ "PCBDDCResetCustomization"
377 PetscErrorCode PCBDDCResetCustomization(PC pc)
378 {
379   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
380   PetscErrorCode ierr;
381 
382   PetscFunctionBegin;
383   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
384   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
385   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
386   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
387   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
388   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
389   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
390   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
391   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
392   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
393   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
394   PetscFunctionReturn(0);
395 }
396 
397 #undef __FUNCT__
398 #define __FUNCT__ "PCBDDCResetTopography"
399 PetscErrorCode PCBDDCResetTopography(PC pc)
400 {
401   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
402   PetscErrorCode ierr;
403 
404   PetscFunctionBegin;
405   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
406   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
407   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
408   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
409   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
410   PetscFunctionReturn(0);
411 }
412 
413 #undef __FUNCT__
414 #define __FUNCT__ "PCBDDCResetSolvers"
415 PetscErrorCode PCBDDCResetSolvers(PC pc)
416 {
417   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
418   PetscScalar    *array;
419   PetscErrorCode ierr;
420 
421   PetscFunctionBegin;
422   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
423   if (pcbddc->coarse_phi_B) {
424     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
425     ierr = PetscFree(array);CHKERRQ(ierr);
426   }
427   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
428   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
429   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
430   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
431   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
432   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
433   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
434   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
435   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
436   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
437   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
438   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
439   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
440   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
441   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
442   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
443   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
444   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
445   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
446   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
447   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
448   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
449   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
450   PetscFunctionReturn(0);
451 }
452 
453 #undef __FUNCT__
454 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
455 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
456 {
457   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
458   PC_IS          *pcis = (PC_IS*)pc->data;
459   VecType        impVecType;
460   PetscInt       n_constraints,n_R,old_size;
461   PetscErrorCode ierr;
462 
463   PetscFunctionBegin;
464   if (!pcbddc->ConstraintMatrix) {
465     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
466   }
467   /* get sizes */
468   n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices;
469   n_R = pcis->n-pcbddc->n_vertices;
470   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
471   /* local work vectors (try to avoid unneeded work)*/
472   /* R nodes */
473   old_size = -1;
474   if (pcbddc->vec1_R) {
475     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
476   }
477   if (n_R != old_size) {
478     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
479     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
480     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
481     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
482     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
483     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
484   }
485   /* local primal dofs */
486   old_size = -1;
487   if (pcbddc->vec1_P) {
488     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
489   }
490   if (pcbddc->local_primal_size != old_size) {
491     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
492     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
493     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
494     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
495   }
496   /* local explicit constraints */
497   old_size = -1;
498   if (pcbddc->vec1_C) {
499     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
500   }
501   if (n_constraints && n_constraints != old_size) {
502     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
503     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
504     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
505     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
506   }
507   PetscFunctionReturn(0);
508 }
509 
510 #undef __FUNCT__
511 #define __FUNCT__ "PCBDDCSetUpCorrection"
512 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
513 {
514   PetscErrorCode  ierr;
515   /* pointers to pcis and pcbddc */
516   PC_IS*          pcis = (PC_IS*)pc->data;
517   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
518   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
519   /* submatrices of local problem */
520   Mat             A_RV,A_VR,A_VV,local_auxmat2_R;
521   /* submatrices of local coarse problem */
522   Mat             S_VV,S_CV,S_VC,S_CC;
523   /* working matrices */
524   Mat             C_CR;
525   /* additional working stuff */
526   PC              pc_R;
527   Mat             F;
528   PetscBool       isLU,isCHOL,isILU;
529 
530   PetscScalar     *coarse_submat_vals; /* TODO: use a PETSc matrix */
531   PetscScalar     *work;
532   PetscInt        *idx_V_B;
533   PetscInt        n,n_vertices,n_constraints;
534   PetscInt        i,n_R,n_D,n_B;
535   PetscBool       unsymmetric_check;
536   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
537   MatType         impMatType;
538   /* some shortcuts to scalars */
539   PetscScalar     one=1.0,m_one=-1.0;
540 
541   PetscFunctionBegin;
542   n_vertices = pcbddc->n_vertices;
543   n_constraints = pcbddc->local_primal_size-n_vertices;
544   /* Set Non-overlapping dimensions */
545   n_B = pcis->n_B;
546   n_D = pcis->n - n_B;
547   n_R = pcis->n - n_vertices;
548 
549   /* Set types for local objects needed by BDDC precondtioner */
550   impMatType = MATSEQDENSE;
551 
552   /* vertices in boundary numbering */
553   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
554   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr);
555   if (i != n_vertices) {
556     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
557   }
558 
559   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
560   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
561   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
562   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
563   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
564   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
565   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
566   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
567   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
568   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
569 
570   unsymmetric_check = PETSC_FALSE;
571   /* allocate workspace */
572   n = 0;
573   if (n_constraints) {
574     n += n_R*n_constraints;
575   }
576   if (n_vertices) {
577     n = PetscMax(2*n_R*n_vertices,n);
578     n = PetscMax((n_R+n_B)*n_vertices,n);
579   }
580   if (!pcbddc->issym) {
581     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
582     unsymmetric_check = PETSC_TRUE;
583   }
584   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
585 
586   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
587   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
588   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
589   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
590   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
591   if (isLU || isILU || isCHOL) {
592     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
593   } else if (sub_schurs->reuse_mumps) {
594     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
595     MatFactorType type;
596 
597     F = reuse_mumps->F;
598     ierr = MatGetFactorType(F,&type);CHKERRQ(ierr);
599     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
600   } else {
601     F = NULL;
602   }
603 
604   /* Precompute stuffs needed for preprocessing and application of BDDC*/
605   if (n_constraints) {
606     Mat         M1,M2,M3;
607     Mat         auxmat;
608     IS          is_aux;
609     PetscScalar *array,*array2;
610 
611     ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
612     ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
613 
614     /* Extract constraints on R nodes: C_{CR}  */
615     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
616     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
617     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr);
618 
619     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
620     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
621     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
622     for (i=0;i<n_constraints;i++) {
623       const PetscScalar *row_cmat_values;
624       const PetscInt    *row_cmat_indices;
625       PetscInt          size_of_constraint,j;
626 
627       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
628       for (j=0;j<size_of_constraint;j++) {
629         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
630       }
631       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
632     }
633     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr);
634     if (F) {
635       Mat B;
636 
637       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
638       ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr);
639       ierr = MatDestroy(&B);CHKERRQ(ierr);
640     } else {
641       PetscScalar *marr;
642 
643       ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
644       for (i=0;i<n_constraints;i++) {
645         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
646         ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr);
647         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
648         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
649         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
650       }
651       ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
652     }
653     if (!pcbddc->switch_static) {
654       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr);
655       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
656       ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
657       for (i=0;i<n_constraints;i++) {
658         ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr);
659         ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr);
660         ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
661         ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
662         ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
663         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
664       }
665       ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
666       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
667       ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
668     } else {
669       ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr);
670       pcbddc->local_auxmat2 = local_auxmat2_R;
671       ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
672     }
673     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
674     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
675     ierr = MatScale(M3,m_one);CHKERRQ(ierr);
676     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
677     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
678     if (isCHOL) {
679       ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr);
680     } else {
681       ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
682     }
683     ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr);
684     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
685     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
686     ierr = MatDestroy(&M2);CHKERRQ(ierr);
687     ierr = MatDestroy(&M3);CHKERRQ(ierr);
688     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
689     ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
690     ierr = MatDestroy(&auxmat);CHKERRQ(ierr);
691     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
692     ierr = MatDestroy(&M1);CHKERRQ(ierr);
693   }
694   /* Get submatrices from subdomain matrix */
695   if (n_vertices) {
696     Mat       newmat;
697     IS        is_aux;
698     PetscInt  ibs,mbs;
699     PetscBool issbaij;
700 
701     if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */
702       IS tis;
703 
704       ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr);
705       ierr = ISSort(tis);CHKERRQ(ierr);
706       ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr);
707       ierr = ISDestroy(&tis);CHKERRQ(ierr);
708     } else {
709       ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
710     }
711     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
712     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
713     if (ibs != mbs) { /* need to convert to SEQAIJ */
714       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
715       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
716       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
717       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
718       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
719       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
720     } else {
721       /* this is safe */
722       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
723       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
724       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
725         ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
726         ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
727         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
728         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
729         ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
730         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
731       } else {
732         ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
733         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
734         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
735       }
736       /* need to convert to SeqAIJ for MatMatMult with dense matrix */
737       ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
738     }
739     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
740   }
741 
742   /* Matrix of coarse basis functions (local) */
743   if (pcbddc->coarse_phi_B) {
744     PetscInt on_B,on_primal,on_D=n_D;
745     if (pcbddc->coarse_phi_D) {
746       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
747     }
748     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
749     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
750       PetscScalar *marray;
751 
752       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
753       ierr = PetscFree(marray);CHKERRQ(ierr);
754       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
755       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
756       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
757       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
758     }
759   }
760 
761   if (!pcbddc->coarse_phi_B) {
762     PetscScalar *marray;
763 
764     n = n_B*pcbddc->local_primal_size;
765     if (pcbddc->switch_static || pcbddc->dbg_flag) {
766       n += n_D*pcbddc->local_primal_size;
767     }
768     if (!pcbddc->issym) {
769       n *= 2;
770     }
771     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
772     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
773     n = n_B*pcbddc->local_primal_size;
774     if (pcbddc->switch_static || pcbddc->dbg_flag) {
775       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
776       n += n_D*pcbddc->local_primal_size;
777     }
778     if (!pcbddc->issym) {
779       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
780       if (pcbddc->switch_static || pcbddc->dbg_flag) {
781         n = n_B*pcbddc->local_primal_size;
782         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
783       }
784     } else {
785       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
786       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
787       if (pcbddc->switch_static || pcbddc->dbg_flag) {
788         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
789         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
790       }
791     }
792   }
793   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
794   /* vertices */
795   if (n_vertices) {
796 
797     if (n_R) {
798       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
799       PetscBLASInt B_N,B_one = 1;
800       PetscScalar  *x,*y;
801 
802       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
803       ierr = MatScale(A_RV,m_one);CHKERRQ(ierr);
804       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
805       if (F) { /* TODO could be optimized for symmetric problems */
806         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
807       } else {
808         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
809         for (i=0;i<n_vertices;i++) {
810           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
811           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
812           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
813           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
814           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
815         }
816         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
817       }
818       ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
819       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
820       if (n_constraints) {
821         Mat B;
822 
823         ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
824         for (i=0;i<n_vertices;i++) {
825           ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
826           ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr);
827           ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
828           ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
829           ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
830           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
831         }
832         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
833         ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
834         ierr = MatDestroy(&B);CHKERRQ(ierr);
835         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
836         ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
837         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
838         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
839         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
840         ierr = MatDestroy(&B);CHKERRQ(ierr);
841       }
842       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
843       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
844       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
845       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
846       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
847       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
848       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
849       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
850       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
851       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
852     } else {
853       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
854     }
855     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
856     /* coarse basis functions */
857     for (i=0;i<n_vertices;i++) {
858       PetscScalar *y;
859 
860       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
861       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
862       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
863       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
864       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
865       y[n_B*i+idx_V_B[i]] = 1.0;
866       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
867       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
868 
869       if (pcbddc->switch_static || pcbddc->dbg_flag) {
870         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
871         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
872         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
873         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
874         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
875         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
876       }
877       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
878     }
879   }
880 
881   if (n_constraints) {
882     Mat B;
883 
884     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
885     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
886     ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
887     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
888     if (n_vertices) {
889       if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
890         ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr);
891       } else {
892         Mat S_VCt;
893 
894         ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr);
895         ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
896         ierr = MatDestroy(&S_VCt);CHKERRQ(ierr);
897       }
898     }
899     ierr = MatDestroy(&B);CHKERRQ(ierr);
900     /* coarse basis functions */
901     for (i=0;i<n_constraints;i++) {
902       PetscScalar *y;
903 
904       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
905       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
906       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
907       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
908       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
909       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
910       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
911       if (pcbddc->switch_static || pcbddc->dbg_flag) {
912         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
913         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
914         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
915         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
916         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
917         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
918       }
919       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
920     }
921   }
922   if (n_constraints) {
923     ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr);
924   }
925 
926   /* compute other basis functions for non-symmetric problems */
927   if (!pcbddc->issym) {
928 
929     if (n_constraints) {
930       Mat S_CCT,B_C;
931 
932       /* this is a lazy thing */
933       ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
934       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
935       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
936       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
937       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
938       if (n_vertices) {
939         Mat B_V,S_VCT;
940 
941         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
942         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
943         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
944         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
945         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
946       }
947       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
948     } else { /* if there are no constraints, reset work */
949       ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
950     }
951     if (n_vertices && n_R) {
952       Mat          A_VRT;
953       PetscScalar  *marray;
954       PetscBLASInt B_N,B_one = 1;
955 
956       ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr);
957       ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
958       ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr);
959       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
960       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one));
961       ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr);
962       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
963     }
964 
965     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
966       for (i=0;i<pcbddc->local_primal_size;i++) {
967         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
968         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
969         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
970         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
971         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
972       }
973     } else {
974       for (i=0;i<pcbddc->local_primal_size;i++) {
975         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
976         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
977         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
978         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
979         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
980       }
981     }
982     /* coarse basis functions */
983     for (i=0;i<pcbddc->local_primal_size;i++) {
984       PetscScalar *y;
985 
986       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
987       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
988       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
989       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
990       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
991       if (i<n_vertices) {
992         y[n_B*i+idx_V_B[i]] = 1.0;
993       }
994       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
995       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
996 
997       if (pcbddc->switch_static || pcbddc->dbg_flag) {
998         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
999         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1000         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1001         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1002         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1003         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1004       }
1005       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1006     }
1007   }
1008   /* free memory */
1009   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
1010   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
1011   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
1012   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
1013   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
1014   ierr = PetscFree(work);CHKERRQ(ierr);
1015   if (n_vertices) {
1016     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1017   }
1018   if (n_constraints) {
1019     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1020   }
1021   /* Checking coarse_sub_mat and coarse basis functios */
1022   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1023   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1024   if (pcbddc->dbg_flag) {
1025     Mat         coarse_sub_mat;
1026     Mat         AUXMAT,TM1,TM2,TM3,TM4;
1027     Mat         coarse_phi_D,coarse_phi_B;
1028     Mat         coarse_psi_D,coarse_psi_B;
1029     Mat         A_II,A_BB,A_IB,A_BI;
1030     Mat         C_B,CPHI;
1031     IS          is_dummy;
1032     Vec         mones;
1033     MatType     checkmattype=MATSEQAIJ;
1034     PetscReal   real_value;
1035 
1036     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
1037     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
1038     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
1039     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
1040     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
1041     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
1042     if (unsymmetric_check) {
1043       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
1044       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
1045     }
1046     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
1047 
1048     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1049     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr);
1050     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1051     if (unsymmetric_check) {
1052       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1053       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1054       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1055       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1056       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1057       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1058       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1059       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1060       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1061       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1062       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1063       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1064     } else {
1065       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1066       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1067       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1068       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1069       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1070       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1071       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1072       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1073     }
1074     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1075     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1076     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1077     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1078     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1079     ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1080     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1081     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d      matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1082 
1083     /* check constraints */
1084     ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr);
1085     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1086     ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1087     ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr);
1088     ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1089     ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1090     ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1091     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1092     ierr = MatDestroy(&C_B);CHKERRQ(ierr);
1093     ierr = MatDestroy(&CPHI);CHKERRQ(ierr);
1094     ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1095     ierr = VecDestroy(&mones);CHKERRQ(ierr);
1096 
1097     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1098     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1099     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1100     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1101     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1102     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1103     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1104     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1105     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1106     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1107     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1108     if (unsymmetric_check) {
1109       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1110       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1111     }
1112     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1113   }
1114   /* get back data */
1115   *coarse_submat_vals_n = coarse_submat_vals;
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 #undef __FUNCT__
1120 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1121 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1122 {
1123   Mat            *work_mat;
1124   IS             isrow_s,iscol_s;
1125   PetscBool      rsorted,csorted;
1126   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1127   PetscErrorCode ierr;
1128 
1129   PetscFunctionBegin;
1130   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1131   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1132   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1133   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1134 
1135   if (!rsorted) {
1136     const PetscInt *idxs;
1137     PetscInt *idxs_sorted,i;
1138 
1139     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1140     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1141     for (i=0;i<rsize;i++) {
1142       idxs_perm_r[i] = i;
1143     }
1144     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1145     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1146     for (i=0;i<rsize;i++) {
1147       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1148     }
1149     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1150     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1151   } else {
1152     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1153     isrow_s = isrow;
1154   }
1155 
1156   if (!csorted) {
1157     if (isrow == iscol) {
1158       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1159       iscol_s = isrow_s;
1160     } else {
1161       const PetscInt *idxs;
1162       PetscInt *idxs_sorted,i;
1163 
1164       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1165       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1166       for (i=0;i<csize;i++) {
1167         idxs_perm_c[i] = i;
1168       }
1169       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1170       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1171       for (i=0;i<csize;i++) {
1172         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1173       }
1174       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1175       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1176     }
1177   } else {
1178     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1179     iscol_s = iscol;
1180   }
1181 
1182   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1183 
1184   if (!rsorted || !csorted) {
1185     Mat      new_mat;
1186     IS       is_perm_r,is_perm_c;
1187 
1188     if (!rsorted) {
1189       PetscInt *idxs_r,i;
1190       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1191       for (i=0;i<rsize;i++) {
1192         idxs_r[idxs_perm_r[i]] = i;
1193       }
1194       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1195       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1196     } else {
1197       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1198     }
1199     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1200 
1201     if (!csorted) {
1202       if (isrow_s == iscol_s) {
1203         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1204         is_perm_c = is_perm_r;
1205       } else {
1206         PetscInt *idxs_c,i;
1207         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1208         for (i=0;i<csize;i++) {
1209           idxs_c[idxs_perm_c[i]] = i;
1210         }
1211         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1212         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1213       }
1214     } else {
1215       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1216     }
1217     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1218 
1219     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1220     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1221     work_mat[0] = new_mat;
1222     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1223     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1224   }
1225 
1226   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1227   *B = work_mat[0];
1228   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1229   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1230   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 #undef __FUNCT__
1235 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1236 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1237 {
1238   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1239   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1240   Mat            new_mat;
1241   IS             is_local,is_global;
1242   PetscInt       local_size;
1243   PetscBool      isseqaij;
1244   PetscErrorCode ierr;
1245 
1246   PetscFunctionBegin;
1247   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1248   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1249   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1250   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1251   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1252   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1253   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1254 
1255   /* check */
1256   if (pcbddc->dbg_flag) {
1257     Vec       x,x_change;
1258     PetscReal error;
1259 
1260     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1261     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1262     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1263     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1264     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1265     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1266     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1267     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1268     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1269     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1270     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1271     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1272     ierr = VecDestroy(&x);CHKERRQ(ierr);
1273     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1274   }
1275 
1276   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1277   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1278   if (isseqaij) {
1279     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1280   } else {
1281     Mat work_mat;
1282     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1283     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1284     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1285   }
1286   ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
1287 #if !defined(PETSC_USE_COMPLEX)
1288   ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,pcbddc->issym);CHKERRQ(ierr);
1289 #endif
1290   /*
1291   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1292   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1293   */
1294   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1295   PetscFunctionReturn(0);
1296 }
1297 
1298 #undef __FUNCT__
1299 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1300 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1301 {
1302   PC_IS*          pcis = (PC_IS*)(pc->data);
1303   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
1304   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1305   PetscInt        *idx_R_local=NULL;
1306   PetscInt        n_vertices,i,j,n_R,n_D,n_B;
1307   PetscInt        vbs,bs;
1308   PetscBT         bitmask=NULL;
1309   PetscErrorCode  ierr;
1310 
1311   PetscFunctionBegin;
1312   /*
1313     No need to setup local scatters if
1314       - primal space is unchanged
1315         AND
1316       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1317         AND
1318       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1319   */
1320   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1321     PetscFunctionReturn(0);
1322   }
1323   /* destroy old objects */
1324   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1325   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1326   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1327   /* Set Non-overlapping dimensions */
1328   n_B = pcis->n_B;
1329   n_D = pcis->n - n_B;
1330   n_vertices = pcbddc->n_vertices;
1331 
1332   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1333 
1334   /* create auxiliary bitmask and allocate workspace */
1335   if (!sub_schurs->reuse_mumps) {
1336     ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1337     ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1338     for (i=0;i<n_vertices;i++) {
1339       ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr);
1340     }
1341 
1342     for (i=0, n_R=0; i<pcis->n; i++) {
1343       if (!PetscBTLookup(bitmask,i)) {
1344         idx_R_local[n_R++] = i;
1345       }
1346     }
1347   } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */
1348     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1349 
1350     ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1351     ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr);
1352   }
1353 
1354   /* Block code */
1355   vbs = 1;
1356   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1357   if (bs>1 && !(n_vertices%bs)) {
1358     PetscBool is_blocked = PETSC_TRUE;
1359     PetscInt  *vary;
1360     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1361     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1362     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1363     for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
1364     for (i=0; i<n_vertices/bs; i++) {
1365       if (vary[i]!=0 && vary[i]!=bs) {
1366         is_blocked = PETSC_FALSE;
1367         break;
1368       }
1369     }
1370     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1371       vbs = bs;
1372       for (i=0;i<n_R/vbs;i++) {
1373         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1374       }
1375     }
1376     ierr = PetscFree(vary);CHKERRQ(ierr);
1377   }
1378   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1379   if (sub_schurs->reuse_mumps) {
1380     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1381 
1382     ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1383     ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr);
1384     ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr);
1385     reuse_mumps->is_R = pcbddc->is_R_local;
1386   } else {
1387     ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1388   }
1389 
1390   /* print some info if requested */
1391   if (pcbddc->dbg_flag) {
1392     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1393     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1394     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1395     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1396     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1397     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1398     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1399   }
1400 
1401   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1402   if (!sub_schurs->reuse_mumps) {
1403     IS       is_aux1,is_aux2;
1404     PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1405 
1406     ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1407     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1408     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1409     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1410     for (i=0; i<n_D; i++) {
1411       ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1412     }
1413     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1414     for (i=0, j=0; i<n_R; i++) {
1415       if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1416         aux_array1[j++] = i;
1417       }
1418     }
1419     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1420     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1421     for (i=0, j=0; i<n_B; i++) {
1422       if (!PetscBTLookup(bitmask,is_indices[i])) {
1423         aux_array2[j++] = i;
1424       }
1425     }
1426     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1427     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1428     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1429     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1430     ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1431 
1432     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1433       ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1434       for (i=0, j=0; i<n_R; i++) {
1435         if (PetscBTLookup(bitmask,idx_R_local[i])) {
1436           aux_array1[j++] = i;
1437         }
1438       }
1439       ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1440       ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1441       ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1442     }
1443     ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1444     ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1445   } else {
1446     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1447     IS               tis;
1448     PetscInt         schur_size;
1449 
1450     ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr);
1451     ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr);
1452     ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr);
1453     ierr = ISDestroy(&tis);CHKERRQ(ierr);
1454     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1455       ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr);
1456       ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1457       ierr = ISDestroy(&tis);CHKERRQ(ierr);
1458     }
1459   }
1460   PetscFunctionReturn(0);
1461 }
1462 
1463 
1464 #undef __FUNCT__
1465 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1466 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1467 {
1468   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1469   PC_IS          *pcis = (PC_IS*)pc->data;
1470   PC             pc_temp;
1471   Mat            A_RR;
1472   MatReuse       reuse;
1473   PetscScalar    m_one = -1.0;
1474   PetscReal      value;
1475   PetscInt       n_D,n_R,ibs,mbs;
1476   PetscBool      use_exact,use_exact_reduced,issbaij;
1477   PetscErrorCode ierr;
1478   /* prefixes stuff */
1479   char           dir_prefix[256],neu_prefix[256],str_level[16];
1480   size_t         len;
1481 
1482   PetscFunctionBegin;
1483 
1484   /* compute prefixes */
1485   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1486   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1487   if (!pcbddc->current_level) {
1488     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1489     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1490     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1491     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1492   } else {
1493     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1494     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1495     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1496     len -= 15; /* remove "pc_bddc_coarse_" */
1497     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1498     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1499     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1500     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1501     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1502     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1503     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1504     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1505   }
1506 
1507   /* DIRICHLET PROBLEM */
1508   if (dirichlet) {
1509     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1510     if (pcbddc->issym) {
1511       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1512     }
1513     /* Matrix for Dirichlet problem is pcis->A_II */
1514     n_D = pcis->n - pcis->n_B;
1515     if (!pcbddc->ksp_D) { /* create object if not yet build */
1516       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1517       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1518       /* default */
1519       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1520       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1521       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1522       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1523       if (issbaij) {
1524         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1525       } else {
1526         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1527       }
1528       /* Allow user's customization */
1529       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1530       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1531     }
1532     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1533     if (sub_schurs->reuse_mumps) {
1534       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1535 
1536       ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr);
1537     }
1538     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1539     if (!n_D) {
1540       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1541       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1542     }
1543     /* Set Up KSP for Dirichlet problem of BDDC */
1544     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1545     /* set ksp_D into pcis data */
1546     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1547     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1548     pcis->ksp_D = pcbddc->ksp_D;
1549   }
1550 
1551   /* NEUMANN PROBLEM */
1552   A_RR = 0;
1553   if (neumann) {
1554     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1555     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1556     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1557     if (pcbddc->ksp_R) { /* already created ksp */
1558       PetscInt nn_R;
1559       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1560       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1561       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1562       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1563         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1564         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1565         reuse = MAT_INITIAL_MATRIX;
1566       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1567         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1568           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1569           reuse = MAT_INITIAL_MATRIX;
1570         } else { /* safe to reuse the matrix */
1571           reuse = MAT_REUSE_MATRIX;
1572         }
1573       }
1574       /* last check */
1575       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1576         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1577         reuse = MAT_INITIAL_MATRIX;
1578       }
1579     } else { /* first time, so we need to create the matrix */
1580       reuse = MAT_INITIAL_MATRIX;
1581     }
1582     /* extract A_RR */
1583     if (!sub_schurs->reuse_mumps) {
1584       ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1585       ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1586       if (ibs != mbs) {
1587         Mat newmat;
1588         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1589         ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1590         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1591       } else {
1592         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1593       }
1594       if (pcbddc->issym) {
1595         ierr = MatSetOption(A_RR,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1596       }
1597     } else {
1598       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1599 
1600       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1601       ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr);
1602       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1603     }
1604     if (!pcbddc->ksp_R) { /* create object if not present */
1605       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1606       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1607       /* default */
1608       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1609       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1610       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1611       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1612       if (issbaij) {
1613         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1614       } else {
1615         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1616       }
1617       /* Allow user's customization */
1618       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1619       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1620     }
1621     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1622     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1623     if (!n_R) {
1624       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1625       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1626     }
1627     /* Reuse MUMPS solver if it is present */
1628     if (sub_schurs->reuse_mumps) {
1629       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1630 
1631       ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr);
1632     }
1633     /* Set Up KSP for Neumann problem of BDDC */
1634     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1635   }
1636   /* free Neumann problem's matrix */
1637   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1638 
1639   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1640   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1641     if (pcbddc->dbg_flag) {
1642       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1643       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1644       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1645     }
1646     if (dirichlet) { /* Dirichlet */
1647       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1648       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1649       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1650       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1651       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1652       /* need to be adapted? */
1653       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1654       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1655       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1656       /* print info */
1657       if (pcbddc->dbg_flag) {
1658         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1659         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1660       }
1661       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1662         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1663       }
1664     }
1665     if (neumann) { /* Neumann */
1666       ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr);
1667       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1668       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1669       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1670       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1671       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1672       /* need to be adapted? */
1673       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1674       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1675       /* print info */
1676       if (pcbddc->dbg_flag) {
1677         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1678         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1679       }
1680       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1681         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1682       }
1683     }
1684   }
1685   PetscFunctionReturn(0);
1686 }
1687 
1688 #undef __FUNCT__
1689 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1690 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
1691 {
1692   PetscErrorCode  ierr;
1693   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1694   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1695 
1696   PetscFunctionBegin;
1697   if (!sub_schurs->reuse_mumps) {
1698     ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr);
1699   }
1700   if (!pcbddc->switch_static) {
1701     if (applytranspose && pcbddc->local_auxmat1) {
1702       ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1703       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1704     }
1705     if (!sub_schurs->reuse_mumps) {
1706       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1707       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1708     } else {
1709       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1710 
1711       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1712       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1713     }
1714   } else {
1715     ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1716     ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1717     ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1718     ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1719     if (applytranspose && pcbddc->local_auxmat1) {
1720       ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr);
1721       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1722       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1723       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1724     }
1725   }
1726   if (!sub_schurs->reuse_mumps) {
1727     if (applytranspose) {
1728       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1729     } else {
1730       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1731     }
1732 #if defined(PETSC_HAVE_MUMPS)
1733   } else {
1734     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1735 
1736     if (applytranspose) {
1737       ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
1738     } else {
1739       ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
1740     }
1741 #endif
1742   }
1743   ierr = VecSet(inout_B,0.);CHKERRQ(ierr);
1744   if (!pcbddc->switch_static) {
1745     if (!sub_schurs->reuse_mumps) {
1746       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1747       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1748     } else {
1749       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1750 
1751       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1752       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1753     }
1754     if (!applytranspose && pcbddc->local_auxmat1) {
1755       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1756       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1757     }
1758   } else {
1759     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1760     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1761     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1762     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1763     if (!applytranspose && pcbddc->local_auxmat1) {
1764       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1765       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1766     }
1767     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1768     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1769     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1770     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1771   }
1772   PetscFunctionReturn(0);
1773 }
1774 
1775 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1776 #undef __FUNCT__
1777 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1778 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1779 {
1780   PetscErrorCode ierr;
1781   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1782   PC_IS*            pcis = (PC_IS*)  (pc->data);
1783   const PetscScalar zero = 0.0;
1784 
1785   PetscFunctionBegin;
1786   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1787   if (applytranspose) {
1788     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1789     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1790   } else {
1791     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1792     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1793   }
1794   /* start communications from local primal nodes to rhs of coarse solver */
1795   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1796   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1797   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1798 
1799   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1800   /* TODO remove null space when doing multilevel */
1801   if (pcbddc->coarse_ksp) {
1802     Vec rhs,sol;
1803 
1804     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
1805     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
1806     if (applytranspose) {
1807       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1808     } else {
1809       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1810     }
1811   }
1812 
1813   /* Local solution on R nodes */
1814   if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */
1815     ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr);
1816   }
1817 
1818   /* communications from coarse sol to local primal nodes */
1819   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1820   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1821 
1822   /* Sum contributions from two levels */
1823   if (applytranspose) {
1824     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1825     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1826   } else {
1827     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1828     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1829   }
1830   PetscFunctionReturn(0);
1831 }
1832 
1833 #undef __FUNCT__
1834 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1835 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1836 {
1837   PetscErrorCode ierr;
1838   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1839   PetscScalar    *array;
1840   Vec            from,to;
1841 
1842   PetscFunctionBegin;
1843   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1844     from = pcbddc->coarse_vec;
1845     to = pcbddc->vec1_P;
1846     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1847       Vec tvec;
1848 
1849       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1850       ierr = VecResetArray(tvec);CHKERRQ(ierr);
1851       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1852       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
1853       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
1854       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
1855     }
1856   } else { /* from local to global -> put data in coarse right hand side */
1857     from = pcbddc->vec1_P;
1858     to = pcbddc->coarse_vec;
1859   }
1860   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1861   PetscFunctionReturn(0);
1862 }
1863 
1864 #undef __FUNCT__
1865 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1866 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1867 {
1868   PetscErrorCode ierr;
1869   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1870   PetscScalar    *array;
1871   Vec            from,to;
1872 
1873   PetscFunctionBegin;
1874   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1875     from = pcbddc->coarse_vec;
1876     to = pcbddc->vec1_P;
1877   } else { /* from local to global -> put data in coarse right hand side */
1878     from = pcbddc->vec1_P;
1879     to = pcbddc->coarse_vec;
1880   }
1881   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1882   if (smode == SCATTER_FORWARD) {
1883     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1884       Vec tvec;
1885 
1886       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1887       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
1888       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
1889       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
1890     }
1891   } else {
1892     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
1893      ierr = VecResetArray(from);CHKERRQ(ierr);
1894     }
1895   }
1896   PetscFunctionReturn(0);
1897 }
1898 
1899 /* uncomment for testing purposes */
1900 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1901 #undef __FUNCT__
1902 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1903 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1904 {
1905   PetscErrorCode    ierr;
1906   PC_IS*            pcis = (PC_IS*)(pc->data);
1907   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1908   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1909   /* one and zero */
1910   PetscScalar       one=1.0,zero=0.0;
1911   /* space to store constraints and their local indices */
1912   PetscScalar       *constraints_data;
1913   PetscInt          *constraints_idxs,*constraints_idxs_B;
1914   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
1915   PetscInt          *constraints_n;
1916   /* iterators */
1917   PetscInt          i,j,k,total_counts,total_counts_cc,cum;
1918   /* BLAS integers */
1919   PetscBLASInt      lwork,lierr;
1920   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1921   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1922   /* reuse */
1923   PetscInt          olocal_primal_size,olocal_primal_size_cc;
1924   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
1925   /* change of basis */
1926   PetscBool         qr_needed;
1927   PetscBT           change_basis,qr_needed_idx;
1928   /* auxiliary stuff */
1929   PetscInt          *nnz,*is_indices;
1930   PetscInt          ncc;
1931   /* some quantities */
1932   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1933   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1934 
1935   PetscFunctionBegin;
1936   /* Destroy Mat objects computed previously */
1937   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1938   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1939   /* save info on constraints from previous setup (if any) */
1940   olocal_primal_size = pcbddc->local_primal_size;
1941   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
1942   ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr);
1943   ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
1944   ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
1945   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
1946   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
1947 
1948   /* print some info */
1949   if (pcbddc->dbg_flag) {
1950     IS       vertices;
1951     PetscInt nv,nedges,nfaces;
1952     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
1953     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
1954     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
1955     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1956     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1957     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
1958     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
1959     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
1960     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1961   }
1962 
1963   if (!pcbddc->adaptive_selection) {
1964     IS           ISForVertices,*ISForFaces,*ISForEdges;
1965     MatNullSpace nearnullsp;
1966     const Vec    *nearnullvecs;
1967     Vec          *localnearnullsp;
1968     PetscScalar  *array;
1969     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
1970     PetscBool    nnsp_has_cnst;
1971     /* LAPACK working arrays for SVD or POD */
1972     PetscBool    skip_lapack,boolforchange;
1973     PetscScalar  *work;
1974     PetscReal    *singular_vals;
1975 #if defined(PETSC_USE_COMPLEX)
1976     PetscReal    *rwork;
1977 #endif
1978 #if defined(PETSC_MISSING_LAPACK_GESVD)
1979     PetscScalar  *temp_basis,*correlation_mat;
1980 #else
1981     PetscBLASInt dummy_int=1;
1982     PetscScalar  dummy_scalar=1.;
1983 #endif
1984 
1985     /* Get index sets for faces, edges and vertices from graph */
1986     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1987     /* free unneeded index sets */
1988     if (!pcbddc->use_vertices) {
1989       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1990     }
1991     if (!pcbddc->use_edges) {
1992       for (i=0;i<n_ISForEdges;i++) {
1993         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1994       }
1995       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1996       n_ISForEdges = 0;
1997     }
1998     if (!pcbddc->use_faces) {
1999       for (i=0;i<n_ISForFaces;i++) {
2000         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2001       }
2002       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2003       n_ISForFaces = 0;
2004     }
2005 
2006 #if defined(PETSC_USE_DEBUG)
2007     /* HACK: when solving singular problems not using vertices, a change of basis is mandatory.
2008        Also use_change_of_basis should be consistent among processors */
2009     if (pcbddc->NullSpace) {
2010       PetscBool tbool[2],gbool[2];
2011 
2012       if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) {
2013         pcbddc->use_change_of_basis = PETSC_TRUE;
2014         if (!ISForEdges) {
2015           pcbddc->use_change_on_faces = PETSC_TRUE;
2016         }
2017       }
2018       tbool[0] = pcbddc->use_change_of_basis;
2019       tbool[1] = pcbddc->use_change_on_faces;
2020       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2021       pcbddc->use_change_of_basis = gbool[0];
2022       pcbddc->use_change_on_faces = gbool[1];
2023     }
2024 #endif
2025 
2026     /* check if near null space is attached to global mat */
2027     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
2028     if (nearnullsp) {
2029       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
2030       /* remove any stored info */
2031       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
2032       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2033       /* store information for BDDC solver reuse */
2034       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
2035       pcbddc->onearnullspace = nearnullsp;
2036       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2037       for (i=0;i<nnsp_size;i++) {
2038         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
2039       }
2040     } else { /* if near null space is not provided BDDC uses constants by default */
2041       nnsp_size = 0;
2042       nnsp_has_cnst = PETSC_TRUE;
2043     }
2044     /* get max number of constraints on a single cc */
2045     max_constraints = nnsp_size;
2046     if (nnsp_has_cnst) max_constraints++;
2047 
2048     /*
2049          Evaluate maximum storage size needed by the procedure
2050          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
2051          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
2052          There can be multiple constraints per connected component
2053                                                                                                                                                            */
2054     n_vertices = 0;
2055     if (ISForVertices) {
2056       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
2057     }
2058     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
2059     ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr);
2060 
2061     total_counts = n_ISForFaces+n_ISForEdges;
2062     total_counts *= max_constraints;
2063     total_counts += n_vertices;
2064     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2065 
2066     total_counts = 0;
2067     max_size_of_constraint = 0;
2068     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
2069       IS used_is;
2070       if (i<n_ISForEdges) {
2071         used_is = ISForEdges[i];
2072       } else {
2073         used_is = ISForFaces[i-n_ISForEdges];
2074       }
2075       ierr = ISGetSize(used_is,&j);CHKERRQ(ierr);
2076       total_counts += j;
2077       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
2078     }
2079     ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr);
2080 
2081     /* get local part of global near null space vectors */
2082     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
2083     for (k=0;k<nnsp_size;k++) {
2084       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
2085       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2086       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2087     }
2088 
2089     /* whether or not to skip lapack calls */
2090     skip_lapack = PETSC_TRUE;
2091     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
2092 
2093     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
2094     if (!skip_lapack) {
2095       PetscScalar temp_work;
2096 
2097 #if defined(PETSC_MISSING_LAPACK_GESVD)
2098       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
2099       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
2100       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
2101       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
2102 #if defined(PETSC_USE_COMPLEX)
2103       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
2104 #endif
2105       /* now we evaluate the optimal workspace using query with lwork=-1 */
2106       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2107       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
2108       lwork = -1;
2109       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2110 #if !defined(PETSC_USE_COMPLEX)
2111       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
2112 #else
2113       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
2114 #endif
2115       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2116       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
2117 #else /* on missing GESVD */
2118       /* SVD */
2119       PetscInt max_n,min_n;
2120       max_n = max_size_of_constraint;
2121       min_n = max_constraints;
2122       if (max_size_of_constraint < max_constraints) {
2123         min_n = max_size_of_constraint;
2124         max_n = max_constraints;
2125       }
2126       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
2127 #if defined(PETSC_USE_COMPLEX)
2128       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
2129 #endif
2130       /* now we evaluate the optimal workspace using query with lwork=-1 */
2131       lwork = -1;
2132       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
2133       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
2134       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
2135       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2136 #if !defined(PETSC_USE_COMPLEX)
2137       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
2138 #else
2139       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
2140 #endif
2141       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2142       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
2143 #endif /* on missing GESVD */
2144       /* Allocate optimal workspace */
2145       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
2146       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
2147     }
2148     /* Now we can loop on constraining sets */
2149     total_counts = 0;
2150     constraints_idxs_ptr[0] = 0;
2151     constraints_data_ptr[0] = 0;
2152     /* vertices */
2153     if (n_vertices) {
2154       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2155       if (nnsp_has_cnst) { /* it considers all possible vertices */
2156         ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2157         for (i=0;i<n_vertices;i++) {
2158           constraints_n[total_counts] = 1;
2159           constraints_data[total_counts] = 1.0;
2160           constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2161           constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2162           total_counts++;
2163         }
2164       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
2165         PetscBool used_vertex;
2166         for (i=0;i<n_vertices;i++) {
2167           used_vertex = PETSC_FALSE;
2168           k = 0;
2169           while (!used_vertex && k<nnsp_size) {
2170             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2171             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
2172               constraints_n[total_counts] = 1;
2173               constraints_idxs[total_counts] = is_indices[i];
2174               constraints_data[total_counts] = 1.0;
2175               constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2176               constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2177               total_counts++;
2178               used_vertex = PETSC_TRUE;
2179             }
2180             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2181             k++;
2182           }
2183         }
2184       }
2185       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2186       n_vertices = total_counts;
2187     }
2188 
2189     /* edges and faces */
2190     total_counts_cc = total_counts;
2191     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2192       IS        used_is;
2193       PetscBool idxs_copied = PETSC_FALSE;
2194 
2195       if (ncc<n_ISForEdges) {
2196         used_is = ISForEdges[ncc];
2197         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2198       } else {
2199         used_is = ISForFaces[ncc-n_ISForEdges];
2200         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2201       }
2202       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2203 
2204       ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr);
2205       ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2206       /* change of basis should not be performed on local periodic nodes */
2207       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2208       if (nnsp_has_cnst) {
2209         PetscScalar quad_value;
2210 
2211         ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2212         idxs_copied = PETSC_TRUE;
2213 
2214         if (!pcbddc->use_nnsp_true) {
2215           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2216         } else {
2217           quad_value = 1.0;
2218         }
2219         for (j=0;j<size_of_constraint;j++) {
2220           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
2221         }
2222         temp_constraints++;
2223         total_counts++;
2224       }
2225       for (k=0;k<nnsp_size;k++) {
2226         PetscReal real_value;
2227         PetscScalar *ptr_to_data;
2228 
2229         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2230         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
2231         for (j=0;j<size_of_constraint;j++) {
2232           ptr_to_data[j] = array[is_indices[j]];
2233         }
2234         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2235         /* check if array is null on the connected component */
2236         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2237         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
2238         if (real_value > 0.0) { /* keep indices and values */
2239           temp_constraints++;
2240           total_counts++;
2241           if (!idxs_copied) {
2242             ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2243             idxs_copied = PETSC_TRUE;
2244           }
2245         }
2246       }
2247       ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2248       valid_constraints = temp_constraints;
2249       if (!pcbddc->use_nnsp_true && temp_constraints) {
2250         if (temp_constraints == 1) { /* just normalize the constraint */
2251           PetscScalar norm,*ptr_to_data;
2252 
2253           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2254           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2255           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
2256           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2257           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
2258         } else { /* perform SVD */
2259           PetscReal   tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2260           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2261 
2262 #if defined(PETSC_MISSING_LAPACK_GESVD)
2263           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2264              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2265              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2266                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2267                 from that computed using LAPACKgesvd
2268              -> This is due to a different computation of eigenvectors in LAPACKheev
2269              -> The quality of the POD-computed basis will be the same */
2270           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2271           /* Store upper triangular part of correlation matrix */
2272           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2273           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2274           for (j=0;j<temp_constraints;j++) {
2275             for (k=0;k<j+1;k++) {
2276               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
2277             }
2278           }
2279           /* compute eigenvalues and eigenvectors of correlation matrix */
2280           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2281           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2282 #if !defined(PETSC_USE_COMPLEX)
2283           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2284 #else
2285           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2286 #endif
2287           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2288           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2289           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2290           j = 0;
2291           while (j < temp_constraints && singular_vals[j] < tol) j++;
2292           total_counts = total_counts-j;
2293           valid_constraints = temp_constraints-j;
2294           /* scale and copy POD basis into used quadrature memory */
2295           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2296           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2297           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2298           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2299           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2300           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2301           if (j<temp_constraints) {
2302             PetscInt ii;
2303             for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
2304             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2305             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2306             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2307             for (k=0;k<temp_constraints-j;k++) {
2308               for (ii=0;ii<size_of_constraint;ii++) {
2309                 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2310               }
2311             }
2312           }
2313 #else  /* on missing GESVD */
2314           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2315           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2316           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2317           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2318 #if !defined(PETSC_USE_COMPLEX)
2319           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2320 #else
2321           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2322 #endif
2323           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2324           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2325           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2326           k = temp_constraints;
2327           if (k > size_of_constraint) k = size_of_constraint;
2328           j = 0;
2329           while (j < k && singular_vals[k-j-1] < tol) j++;
2330           valid_constraints = k-j;
2331           total_counts = total_counts-temp_constraints+valid_constraints;
2332 #endif /* on missing GESVD */
2333         }
2334       }
2335       /* update pointers information */
2336       if (valid_constraints) {
2337         constraints_n[total_counts_cc] = valid_constraints;
2338         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
2339         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
2340         /* set change_of_basis flag */
2341         if (boolforchange) {
2342           PetscBTSet(change_basis,total_counts_cc);
2343         }
2344         total_counts_cc++;
2345       }
2346     }
2347     /* free workspace */
2348     if (!skip_lapack) {
2349       ierr = PetscFree(work);CHKERRQ(ierr);
2350 #if defined(PETSC_USE_COMPLEX)
2351       ierr = PetscFree(rwork);CHKERRQ(ierr);
2352 #endif
2353       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2354 #if defined(PETSC_MISSING_LAPACK_GESVD)
2355       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2356       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2357 #endif
2358     }
2359     for (k=0;k<nnsp_size;k++) {
2360       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2361     }
2362     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2363     /* free index sets of faces, edges and vertices */
2364     for (i=0;i<n_ISForFaces;i++) {
2365       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2366     }
2367     if (n_ISForFaces) {
2368       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2369     }
2370     for (i=0;i<n_ISForEdges;i++) {
2371       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2372     }
2373     if (n_ISForEdges) {
2374       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2375     }
2376     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2377   } else {
2378     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2379 
2380     total_counts = 0;
2381     n_vertices = 0;
2382     if (sub_schurs->is_vertices && pcbddc->use_vertices) {
2383       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
2384     }
2385     max_constraints = 0;
2386     total_counts_cc = 0;
2387     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2388       total_counts += pcbddc->adaptive_constraints_n[i];
2389       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
2390       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2391     }
2392     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
2393     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
2394     constraints_idxs = pcbddc->adaptive_constraints_idxs;
2395     constraints_data = pcbddc->adaptive_constraints_data;
2396     /* constraints_n differs from pcbddc->adaptive_constraints_n */
2397     ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr);
2398     total_counts_cc = 0;
2399     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2400       if (pcbddc->adaptive_constraints_n[i]) {
2401         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
2402       }
2403     }
2404 #if 0
2405     printf("Found %d totals (%d)\n",total_counts_cc,total_counts);
2406     for (i=0;i<total_counts_cc;i++) {
2407       printf("const %d, start %d",i,constraints_idxs_ptr[i]);
2408       printf(" end %d:\n",constraints_idxs_ptr[i+1]);
2409       for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) {
2410         printf(" %d",constraints_idxs[j]);
2411       }
2412       printf("\n");
2413       printf("number of cc: %d\n",constraints_n[i]);
2414     }
2415     for (i=0;i<n_vertices;i++) {
2416       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
2417     }
2418     for (i=0;i<sub_schurs->n_subs;i++) {
2419       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2420     }
2421 #endif
2422 
2423     max_size_of_constraint = 0;
2424     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
2425     ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr);
2426     /* Change of basis */
2427     ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr);
2428     if (pcbddc->use_change_of_basis) {
2429       for (i=0;i<sub_schurs->n_subs;i++) {
2430         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2431           ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr);
2432         }
2433       }
2434     }
2435   }
2436   pcbddc->local_primal_size = total_counts;
2437   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2438 
2439   /* map constraints_idxs in boundary numbering */
2440   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr);
2441   if (i != constraints_idxs_ptr[total_counts_cc]) {
2442     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i);
2443   }
2444 
2445   /* Create constraint matrix */
2446   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2447   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2448   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2449 
2450   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2451   /* determine if a QR strategy is needed for change of basis */
2452   qr_needed = PETSC_FALSE;
2453   ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr);
2454   total_primal_vertices=0;
2455   pcbddc->local_primal_size_cc = 0;
2456   for (i=0;i<total_counts_cc;i++) {
2457     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2458     if (size_of_constraint == 1) {
2459       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
2460       pcbddc->local_primal_size_cc += 1;
2461     } else if (PetscBTLookup(change_basis,i)) {
2462       for (k=0;k<constraints_n[i];k++) {
2463         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2464       }
2465       pcbddc->local_primal_size_cc += constraints_n[i];
2466       if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) {
2467         PetscBTSet(qr_needed_idx,i);
2468         qr_needed = PETSC_TRUE;
2469       }
2470     } else {
2471       pcbddc->local_primal_size_cc += 1;
2472     }
2473   }
2474   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
2475   pcbddc->n_vertices = total_primal_vertices;
2476   /* permute indices in order to have a sorted set of vertices */
2477   ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2478 
2479   ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
2480   ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2481   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
2482 
2483   /* nonzero structure of constraint matrix */
2484   /* and get reference dof for local constraints */
2485   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2486   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
2487 
2488   j = total_primal_vertices;
2489   total_counts = total_primal_vertices;
2490   cum = total_primal_vertices;
2491   for (i=n_vertices;i<total_counts_cc;i++) {
2492     if (!PetscBTLookup(change_basis,i)) {
2493       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
2494       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
2495       cum++;
2496       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2497       for (k=0;k<constraints_n[i];k++) {
2498         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2499         nnz[j+k] = size_of_constraint;
2500       }
2501       j += constraints_n[i];
2502     }
2503   }
2504   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2505   ierr = PetscFree(nnz);CHKERRQ(ierr);
2506 
2507   /* set values in constraint matrix */
2508   for (i=0;i<total_primal_vertices;i++) {
2509     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2510   }
2511   total_counts = total_primal_vertices;
2512   for (i=n_vertices;i<total_counts_cc;i++) {
2513     if (!PetscBTLookup(change_basis,i)) {
2514       PetscInt *cols;
2515 
2516       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2517       cols = constraints_idxs+constraints_idxs_ptr[i];
2518       for (k=0;k<constraints_n[i];k++) {
2519         PetscInt    row = total_counts+k;
2520         PetscScalar *vals;
2521 
2522         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
2523         ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2524       }
2525       total_counts += constraints_n[i];
2526     }
2527   }
2528   /* assembling */
2529   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2530   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2531 
2532   /*
2533   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2534   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2535   */
2536   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2537   if (pcbddc->use_change_of_basis) {
2538     /* dual and primal dofs on a single cc */
2539     PetscInt     dual_dofs,primal_dofs;
2540     /* working stuff for GEQRF */
2541     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2542     PetscBLASInt lqr_work;
2543     /* working stuff for UNGQR */
2544     PetscScalar  *gqr_work,lgqr_work_t;
2545     PetscBLASInt lgqr_work;
2546     /* working stuff for TRTRS */
2547     PetscScalar  *trs_rhs;
2548     PetscBLASInt Blas_NRHS;
2549     /* pointers for values insertion into change of basis matrix */
2550     PetscInt     *start_rows,*start_cols;
2551     PetscScalar  *start_vals;
2552     /* working stuff for values insertion */
2553     PetscBT      is_primal;
2554     PetscInt     *aux_primal_numbering_B;
2555     /* matrix sizes */
2556     PetscInt     global_size,local_size;
2557     /* temporary change of basis */
2558     Mat          localChangeOfBasisMatrix;
2559     /* extra space for debugging */
2560     PetscScalar  *dbg_work;
2561 
2562     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2563     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2564     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2565     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2566     /* nonzeros for local mat */
2567     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2568     for (i=0;i<pcis->n;i++) nnz[i]=1;
2569     for (i=n_vertices;i<total_counts_cc;i++) {
2570       if (PetscBTLookup(change_basis,i)) {
2571         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2572         if (PetscBTLookup(qr_needed_idx,i)) {
2573           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
2574         } else {
2575           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
2576           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
2577         }
2578       }
2579     }
2580     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2581     ierr = PetscFree(nnz);CHKERRQ(ierr);
2582     /* Set initial identity in the matrix */
2583     for (i=0;i<pcis->n;i++) {
2584       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2585     }
2586 
2587     if (pcbddc->dbg_flag) {
2588       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2589       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2590     }
2591 
2592 
2593     /* Now we loop on the constraints which need a change of basis */
2594     /*
2595        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2596        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2597 
2598        Basic blocks of change of basis matrix T computed by
2599 
2600           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2601 
2602             | 1        0   ...        0         s_1/S |
2603             | 0        1   ...        0         s_2/S |
2604             |              ...                        |
2605             | 0        ...            1     s_{n-1}/S |
2606             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2607 
2608             with S = \sum_{i=1}^n s_i^2
2609             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2610                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2611 
2612           - QR decomposition of constraints otherwise
2613     */
2614     if (qr_needed) {
2615       /* space to store Q */
2616       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2617       /* first we issue queries for optimal work */
2618       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2619       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2620       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2621       lqr_work = -1;
2622       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2623       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2624       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2625       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2626       lgqr_work = -1;
2627       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2628       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2629       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2630       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2631       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2632       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2633       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2634       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2635       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2636       /* array to store scaling factors for reflectors */
2637       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2638       /* array to store rhs and solution of triangular solver */
2639       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2640       /* allocating workspace for check */
2641       if (pcbddc->dbg_flag) {
2642         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
2643       }
2644     }
2645     /* array to store whether a node is primal or not */
2646     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2647     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2648     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2649     if (i != total_primal_vertices) {
2650       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2651     }
2652     for (i=0;i<total_primal_vertices;i++) {
2653       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2654     }
2655     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2656 
2657     /* loop on constraints and see whether or not they need a change of basis and compute it */
2658     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
2659       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
2660       if (PetscBTLookup(change_basis,total_counts)) {
2661         /* get constraint info */
2662         primal_dofs = constraints_n[total_counts];
2663         dual_dofs = size_of_constraint-primal_dofs;
2664 
2665         if (pcbddc->dbg_flag) {
2666           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr);
2667         }
2668 
2669         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2670 
2671           /* copy quadrature constraints for change of basis check */
2672           if (pcbddc->dbg_flag) {
2673             ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2674           }
2675           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2676           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2677 
2678           /* compute QR decomposition of constraints */
2679           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2680           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2681           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2682           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2683           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2684           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2685           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2686 
2687           /* explictly compute R^-T */
2688           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2689           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2690           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2691           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2692           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2693           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2694           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2695           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2696           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2697           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2698 
2699           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2700           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2701           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2702           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2703           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2704           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2705           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2706           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2707           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2708 
2709           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2710              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2711              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2712           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2713           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2714           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2715           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2716           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2717           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2718           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2719           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
2720           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2721           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2722 
2723           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2724           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
2725           /* insert cols for primal dofs */
2726           for (j=0;j<primal_dofs;j++) {
2727             start_vals = &qr_basis[j*size_of_constraint];
2728             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2729             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2730           }
2731           /* insert cols for dual dofs */
2732           for (j=0,k=0;j<dual_dofs;k++) {
2733             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
2734               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2735               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2736               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2737               j++;
2738             }
2739           }
2740 
2741           /* check change of basis */
2742           if (pcbddc->dbg_flag) {
2743             PetscInt   ii,jj;
2744             PetscBool valid_qr=PETSC_TRUE;
2745             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2746             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2747             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2748             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2749             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2750             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2751             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2752             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
2753             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2754             for (jj=0;jj<size_of_constraint;jj++) {
2755               for (ii=0;ii<primal_dofs;ii++) {
2756                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2757                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2758               }
2759             }
2760             if (!valid_qr) {
2761               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2762               for (jj=0;jj<size_of_constraint;jj++) {
2763                 for (ii=0;ii<primal_dofs;ii++) {
2764                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2765                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2766                   }
2767                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2768                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2769                   }
2770                 }
2771               }
2772             } else {
2773               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2774             }
2775           }
2776         } else { /* simple transformation block */
2777           PetscInt    row,col;
2778           PetscScalar val,norm;
2779 
2780           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2781           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
2782           for (j=0;j<size_of_constraint;j++) {
2783             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
2784             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2785             if (!PetscBTLookup(is_primal,row_B)) {
2786               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
2787               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2788               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2789             } else {
2790               for (k=0;k<size_of_constraint;k++) {
2791                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2792                 if (row != col) {
2793                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
2794                 } else {
2795                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
2796                 }
2797                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2798               }
2799             }
2800           }
2801           if (pcbddc->dbg_flag) {
2802             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2803           }
2804         }
2805       } else {
2806         if (pcbddc->dbg_flag) {
2807           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr);
2808         }
2809       }
2810     }
2811 
2812     /* free workspace */
2813     if (qr_needed) {
2814       if (pcbddc->dbg_flag) {
2815         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
2816       }
2817       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2818       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2819       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2820       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2821       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2822     }
2823     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2824     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2825     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2826 
2827     /* assembling of global change of variable */
2828     {
2829       Mat      tmat;
2830       PetscInt bs;
2831 
2832       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2833       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2834       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2835       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2836       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2837       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2838       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
2839       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
2840       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2841       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2842       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2843       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2844       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2845       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2846       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2847       ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2848       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2849       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2850     }
2851     /* check */
2852     if (pcbddc->dbg_flag) {
2853       PetscReal error;
2854       Vec       x,x_change;
2855 
2856       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2857       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2858       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2859       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2860       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2861       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2862       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2863       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2864       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2865       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2866       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2867       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2868       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2869       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2870       ierr = VecDestroy(&x);CHKERRQ(ierr);
2871       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2872     }
2873 
2874     /* adapt sub_schurs computed (if any) */
2875     if (pcbddc->use_deluxe_scaling) {
2876       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2877       if (sub_schurs->S_Ej_all) {
2878         Mat S_new,tmat;
2879         IS is_all_N;
2880 
2881         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2882         ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
2883         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2884         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2885         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2886         ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2887         sub_schurs->S_Ej_all = S_new;
2888         ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2889         if (sub_schurs->sum_S_Ej_all) {
2890           ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2891           ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2892           ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2893           sub_schurs->sum_S_Ej_all = S_new;
2894           ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2895         }
2896         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2897       }
2898     }
2899     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2900   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2901     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2902     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2903   }
2904 
2905   /* set up change of basis context */
2906   if (pcbddc->ChangeOfBasisMatrix) {
2907     PCBDDCChange_ctx change_ctx;
2908 
2909     if (!pcbddc->new_global_mat) {
2910       PetscInt global_size,local_size;
2911 
2912       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2913       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2914       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2915       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2916       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2917       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2918       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2919       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2920       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2921     } else {
2922       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2923       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2924       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2925     }
2926     if (!pcbddc->user_ChangeOfBasisMatrix) {
2927       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2928       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2929     } else {
2930       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2931       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2932     }
2933     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2934     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2935     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2936     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2937   }
2938 
2939   /* check if a new primal space has been introduced */
2940   pcbddc->new_primal_space_local = PETSC_TRUE;
2941   if (olocal_primal_size == pcbddc->local_primal_size) {
2942     ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2943     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2944     if (!pcbddc->new_primal_space_local) {
2945       ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2946       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2947     }
2948   }
2949   ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr);
2950   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2951   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2952 
2953   /* flush dbg viewer */
2954   if (pcbddc->dbg_flag) {
2955     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2956   }
2957 
2958   /* free workspace */
2959   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2960   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2961   if (!pcbddc->adaptive_selection) {
2962     ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr);
2963     ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr);
2964   } else {
2965     ierr = PetscFree5(pcbddc->adaptive_constraints_n,
2966                       pcbddc->adaptive_constraints_idxs_ptr,
2967                       pcbddc->adaptive_constraints_data_ptr,
2968                       pcbddc->adaptive_constraints_idxs,
2969                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2970     ierr = PetscFree(constraints_n);CHKERRQ(ierr);
2971     ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr);
2972   }
2973   PetscFunctionReturn(0);
2974 }
2975 
2976 #undef __FUNCT__
2977 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2978 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2979 {
2980   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2981   PC_IS       *pcis = (PC_IS*)pc->data;
2982   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2983   PetscInt    ierr,i,vertex_size,N;
2984   PetscViewer viewer=pcbddc->dbg_viewer;
2985 
2986   PetscFunctionBegin;
2987   /* Reset previously computed graph */
2988   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2989   /* Init local Graph struct */
2990   ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr);
2991   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);CHKERRQ(ierr);
2992 
2993   /* Check validity of the csr graph passed in by the user */
2994   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2995     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2996   }
2997 
2998   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2999   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
3000     PetscInt  *xadj,*adjncy;
3001     PetscInt  nvtxs;
3002     PetscBool flg_row=PETSC_FALSE;
3003 
3004     if (pcbddc->use_local_adj) {
3005 
3006       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3007       if (flg_row) {
3008         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
3009         pcbddc->computed_rowadj = PETSC_TRUE;
3010       }
3011       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3012     } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */
3013       IS                     is_dummy;
3014       ISLocalToGlobalMapping l2gmap_dummy;
3015       PetscInt               j,sum;
3016       PetscInt               *cxadj,*cadjncy;
3017       const PetscInt         *idxs;
3018       PCBDDCGraph            graph;
3019       PetscBT                is_on_boundary;
3020 
3021       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
3022       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
3023       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
3024       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
3025       ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr);
3026       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
3027       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3028       if (flg_row) {
3029         graph->xadj = xadj;
3030         graph->adjncy = adjncy;
3031       }
3032       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
3033       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
3034       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3035 
3036       if (pcbddc->dbg_flag) {
3037         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr);
3038         for (i=0;i<graph->ncc;i++) {
3039           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
3040         }
3041       }
3042 
3043       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
3044       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3045       for (i=0;i<pcis->n_B;i++) {
3046         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
3047       }
3048       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3049 
3050       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
3051       sum = 0;
3052       for (i=0;i<graph->ncc;i++) {
3053         PetscInt sizecc = 0;
3054         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3055           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3056             sizecc++;
3057           }
3058         }
3059         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3060           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3061             cxadj[graph->queue[j]] = sizecc;
3062           }
3063         }
3064         sum += sizecc*sizecc;
3065       }
3066       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
3067       sum = 0;
3068       for (i=0;i<pcis->n;i++) {
3069         PetscInt temp = cxadj[i];
3070         cxadj[i] = sum;
3071         sum += temp;
3072       }
3073       cxadj[pcis->n] = sum;
3074       for (i=0;i<graph->ncc;i++) {
3075         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3076           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3077             PetscInt k,sizecc = 0;
3078             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
3079               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
3080                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
3081                 sizecc++;
3082               }
3083             }
3084           }
3085         }
3086       }
3087       if (sum) {
3088         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
3089       } else {
3090         ierr = PetscFree(cxadj);CHKERRQ(ierr);
3091         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
3092       }
3093       graph->xadj = 0;
3094       graph->adjncy = 0;
3095       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
3096       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
3097     }
3098   }
3099   if (pcbddc->dbg_flag) {
3100     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3101   }
3102 
3103   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
3104   vertex_size = 1;
3105   if (pcbddc->user_provided_isfordofs) {
3106     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
3107       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3108       for (i=0;i<pcbddc->n_ISForDofs;i++) {
3109         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3110         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
3111       }
3112       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
3113       pcbddc->n_ISForDofs = 0;
3114       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
3115     }
3116     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
3117     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
3118   } else {
3119     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
3120       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
3121       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3122       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3123         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3124       }
3125     }
3126   }
3127 
3128   /* Setup of Graph */
3129   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3130     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
3131   }
3132   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3133     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
3134   }
3135   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr);
3136 
3137   /* Graph's connected components analysis */
3138   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
3139 
3140   /* print some info to stdout */
3141   if (pcbddc->dbg_flag) {
3142     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr);
3143   }
3144 
3145   /* mark topography has done */
3146   pcbddc->recompute_topography = PETSC_FALSE;
3147   PetscFunctionReturn(0);
3148 }
3149 
3150 #undef __FUNCT__
3151 #define __FUNCT__ "PCBDDCSubsetNumbering"
3152 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
3153 {
3154   Vec            local_vec,global_vec;
3155   IS             seqis,paris;
3156   VecScatter     scatter_ctx;
3157   PetscScalar    *array;
3158   PetscInt       *temp_global_dofs;
3159   PetscScalar    globalsum;
3160   PetscInt       i,j,s;
3161   PetscInt       nlocals,first_index,old_index,max_local,max_global;
3162   PetscMPIInt    rank_prec_comm,size_prec_comm;
3163   PetscInt       *dof_sizes,*dof_displs;
3164   PetscBool      first_found;
3165   PetscErrorCode ierr;
3166 
3167   PetscFunctionBegin;
3168   /* mpi buffers */
3169   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
3170   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
3171   j = ( !rank_prec_comm ? size_prec_comm : 0);
3172   ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr);
3173   /* get maximum size of subset */
3174   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
3175   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
3176   max_local = 0;
3177   for (i=0;i<n_local_dofs;i++) {
3178     if (max_local < temp_global_dofs[i] ) {
3179       max_local = temp_global_dofs[i];
3180     }
3181   }
3182   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
3183   max_global++;
3184   max_local = 0;
3185   for (i=0;i<n_local_dofs;i++) {
3186     if (max_local < local_dofs[i] ) {
3187       max_local = local_dofs[i];
3188     }
3189   }
3190   max_local++;
3191   /* allocate workspace */
3192   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
3193   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
3194   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
3195   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
3196   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
3197   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
3198   /* create scatter */
3199   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
3200   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
3201   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
3202   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
3203   ierr = ISDestroy(&paris);CHKERRQ(ierr);
3204   /* init array */
3205   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
3206   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3207   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3208   if (local_dofs_mult) {
3209     for (i=0;i<n_local_dofs;i++) {
3210       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
3211     }
3212   } else {
3213     for (i=0;i<n_local_dofs;i++) {
3214       array[local_dofs[i]]=1.0;
3215     }
3216   }
3217   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3218   /* scatter into global vec and get total number of global dofs */
3219   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3220   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3221   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
3222   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
3223   /* Fill global_vec with cumulative function for global numbering */
3224   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
3225   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
3226   nlocals = 0;
3227   first_index = -1;
3228   first_found = PETSC_FALSE;
3229   for (i=0;i<s;i++) {
3230     if (!first_found && PetscRealPart(array[i]) > 0.1) {
3231       first_found = PETSC_TRUE;
3232       first_index = i;
3233     }
3234     nlocals += (PetscInt)PetscRealPart(array[i]);
3235   }
3236   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3237   if (!rank_prec_comm) {
3238     dof_displs[0]=0;
3239     for (i=1;i<size_prec_comm;i++) {
3240       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3241     }
3242   }
3243   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3244   if (first_found) {
3245     array[first_index] += (PetscScalar)nlocals;
3246     old_index = first_index;
3247     for (i=first_index+1;i<s;i++) {
3248       if (PetscRealPart(array[i]) > 0.1) {
3249         array[i] += array[old_index];
3250         old_index = i;
3251       }
3252     }
3253   }
3254   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
3255   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3256   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3257   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3258   /* get global ordering of local dofs */
3259   ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3260   if (local_dofs_mult) {
3261     for (i=0;i<n_local_dofs;i++) {
3262       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
3263     }
3264   } else {
3265     for (i=0;i<n_local_dofs;i++) {
3266       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
3267     }
3268   }
3269   ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3270   /* free workspace */
3271   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
3272   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
3273   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
3274   ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr);
3275   /* return pointer to global ordering of local dofs */
3276   *global_numbering_subset = temp_global_dofs;
3277   PetscFunctionReturn(0);
3278 }
3279 
3280 #undef __FUNCT__
3281 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3282 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3283 {
3284   PetscInt       i,j;
3285   PetscScalar    *alphas;
3286   PetscErrorCode ierr;
3287 
3288   PetscFunctionBegin;
3289   /* this implements stabilized Gram-Schmidt */
3290   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3291   for (i=0;i<n;i++) {
3292     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3293     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3294     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3295   }
3296   ierr = PetscFree(alphas);CHKERRQ(ierr);
3297   PetscFunctionReturn(0);
3298 }
3299 
3300 #undef __FUNCT__
3301 #define __FUNCT__ "MatISGetSubassemblingPattern"
3302 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
3303 {
3304   Mat             subdomain_adj;
3305   IS              new_ranks,ranks_send_to;
3306   MatPartitioning partitioner;
3307   Mat_IS          *matis;
3308   PetscInt        n_neighs,*neighs,*n_shared,**shared;
3309   PetscInt        prank;
3310   PetscMPIInt     size,rank,color;
3311   PetscInt        *xadj,*adjncy,*oldranks;
3312   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
3313   PetscInt        i,local_size,threshold=0;
3314   PetscErrorCode  ierr;
3315   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3316   PetscSubcomm    subcomm;
3317 
3318   PetscFunctionBegin;
3319   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3320   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3321   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3322 
3323   /* Get info on mapping */
3324   matis = (Mat_IS*)(mat->data);
3325   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3326   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3327 
3328   /* build local CSR graph of subdomains' connectivity */
3329   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3330   xadj[0] = 0;
3331   xadj[1] = PetscMax(n_neighs-1,0);
3332   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3333   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3334 
3335   if (threshold) {
3336     PetscInt xadj_count = 0;
3337     for (i=1;i<n_neighs;i++) {
3338       if (n_shared[i] > threshold) {
3339         adjncy[xadj_count] = neighs[i];
3340         adjncy_wgt[xadj_count] = n_shared[i];
3341         xadj_count++;
3342       }
3343     }
3344     xadj[1] = xadj_count;
3345   } else {
3346     if (xadj[1]) {
3347       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3348       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3349     }
3350   }
3351   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3352   if (use_square) {
3353     for (i=0;i<xadj[1];i++) {
3354       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3355     }
3356   }
3357   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3358 
3359   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3360 
3361   /*
3362     Restrict work on active processes only.
3363   */
3364   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3365   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3366   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3367   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3368   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3369   if (color) {
3370     ierr = PetscFree(xadj);CHKERRQ(ierr);
3371     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3372     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3373   } else {
3374     PetscInt coarsening_ratio;
3375     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
3376     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3377     prank = rank;
3378     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
3379     /*
3380     for (i=0;i<size;i++) {
3381       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3382     }
3383     */
3384     for (i=0;i<xadj[1];i++) {
3385       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3386     }
3387     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3388     ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3389     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3390 
3391     /* Partition */
3392     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
3393     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3394     if (use_vwgt) {
3395       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3396       v_wgt[0] = local_size;
3397       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3398     }
3399     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3400     coarsening_ratio = size/n_subdomains;
3401     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3402     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3403     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3404     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3405 
3406     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3407     if (contiguous) {
3408       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3409     } else {
3410       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3411     }
3412     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3413     /* clean up */
3414     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3415     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3416     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3417     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3418   }
3419   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3420 
3421   /* assemble parallel IS for sends */
3422   i = 1;
3423   if (color) i=0;
3424   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3425 
3426   /* get back IS */
3427   *is_sends = ranks_send_to;
3428   PetscFunctionReturn(0);
3429 }
3430 
3431 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3432 
3433 #undef __FUNCT__
3434 #define __FUNCT__ "MatISSubassemble"
3435 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3436 {
3437   Mat                    local_mat;
3438   Mat_IS                 *matis;
3439   IS                     is_sends_internal;
3440   PetscInt               rows,cols,new_local_rows;
3441   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3442   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3443   ISLocalToGlobalMapping l2gmap;
3444   PetscInt*              l2gmap_indices;
3445   const PetscInt*        is_indices;
3446   MatType                new_local_type;
3447   /* buffers */
3448   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3449   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3450   PetscInt               *recv_buffer_idxs_local;
3451   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3452   /* MPI */
3453   MPI_Comm               comm,comm_n;
3454   PetscSubcomm           subcomm;
3455   PetscMPIInt            n_sends,n_recvs,commsize;
3456   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3457   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3458   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3459   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3460   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3461   PetscErrorCode         ierr;
3462 
3463   PetscFunctionBegin;
3464   /* TODO: add missing checks */
3465   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3466   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3467   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3468   PetscValidLogicalCollectiveInt(mat,nis,7);
3469   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3470   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3471   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3472   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3473   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3474   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3475   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3476   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3477     PetscInt mrows,mcols,mnrows,mncols;
3478     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3479     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3480     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3481     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3482     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3483     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3484   }
3485   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3486   PetscValidLogicalCollectiveInt(mat,bs,0);
3487   /* prepare IS for sending if not provided */
3488   if (!is_sends) {
3489     PetscBool pcontig = PETSC_TRUE;
3490     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3491     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3492   } else {
3493     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3494     is_sends_internal = is_sends;
3495   }
3496 
3497   /* get pointer of MATIS data */
3498   matis = (Mat_IS*)mat->data;
3499 
3500   /* get comm */
3501   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3502 
3503   /* compute number of sends */
3504   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3505   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3506 
3507   /* compute number of receives */
3508   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3509   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3510   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3511   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3512   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3513   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3514   ierr = PetscFree(iflags);CHKERRQ(ierr);
3515 
3516   /* restrict comm if requested */
3517   subcomm = 0;
3518   destroy_mat = PETSC_FALSE;
3519   if (restrict_comm) {
3520     PetscMPIInt color,subcommsize;
3521 
3522     color = 0;
3523     if (!n_recvs && n_sends) color = 1; /* processes sending and not receiving anything will not partecipate in new comm */
3524     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3525     subcommsize = commsize - subcommsize;
3526     /* check if reuse has been requested */
3527     if (reuse == MAT_REUSE_MATRIX) {
3528       if (*mat_n) {
3529         PetscMPIInt subcommsize2;
3530         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3531         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3532         comm_n = PetscObjectComm((PetscObject)*mat_n);
3533       } else {
3534         comm_n = PETSC_COMM_SELF;
3535       }
3536     } else { /* MAT_INITIAL_MATRIX */
3537       PetscMPIInt rank;
3538 
3539       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3540       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3541       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3542       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3543       comm_n = PetscSubcommChild(subcomm);
3544     }
3545     /* flag to destroy *mat_n if not significative */
3546     if (color) destroy_mat = PETSC_TRUE;
3547   } else {
3548     comm_n = comm;
3549   }
3550 
3551   /* prepare send/receive buffers */
3552   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3553   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3554   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3555   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3556   if (nis) {
3557     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3558   }
3559 
3560   /* Get data from local matrices */
3561   if (!isdense) {
3562     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3563     /* TODO: See below some guidelines on how to prepare the local buffers */
3564     /*
3565        send_buffer_vals should contain the raw values of the local matrix
3566        send_buffer_idxs should contain:
3567        - MatType_PRIVATE type
3568        - PetscInt        size_of_l2gmap
3569        - PetscInt        global_row_indices[size_of_l2gmap]
3570        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3571     */
3572   } else {
3573     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3574     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3575     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3576     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3577     send_buffer_idxs[1] = i;
3578     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3579     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3580     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3581     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3582     for (i=0;i<n_sends;i++) {
3583       ilengths_vals[is_indices[i]] = len*len;
3584       ilengths_idxs[is_indices[i]] = len+2;
3585     }
3586   }
3587   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3588   /* additional is (if any) */
3589   if (nis) {
3590     PetscMPIInt psum;
3591     PetscInt j;
3592     for (j=0,psum=0;j<nis;j++) {
3593       PetscInt plen;
3594       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3595       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3596       psum += len+1; /* indices + lenght */
3597     }
3598     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3599     for (j=0,psum=0;j<nis;j++) {
3600       PetscInt plen;
3601       const PetscInt *is_array_idxs;
3602       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3603       send_buffer_idxs_is[psum] = plen;
3604       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3605       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3606       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3607       psum += plen+1; /* indices + lenght */
3608     }
3609     for (i=0;i<n_sends;i++) {
3610       ilengths_idxs_is[is_indices[i]] = psum;
3611     }
3612     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3613   }
3614 
3615   buf_size_idxs = 0;
3616   buf_size_vals = 0;
3617   buf_size_idxs_is = 0;
3618   for (i=0;i<n_recvs;i++) {
3619     buf_size_idxs += (PetscInt)olengths_idxs[i];
3620     buf_size_vals += (PetscInt)olengths_vals[i];
3621     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3622   }
3623   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3624   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3625   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3626 
3627   /* get new tags for clean communications */
3628   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3629   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3630   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3631 
3632   /* allocate for requests */
3633   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3634   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3635   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3636   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3637   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3638   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3639 
3640   /* communications */
3641   ptr_idxs = recv_buffer_idxs;
3642   ptr_vals = recv_buffer_vals;
3643   ptr_idxs_is = recv_buffer_idxs_is;
3644   for (i=0;i<n_recvs;i++) {
3645     source_dest = onodes[i];
3646     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3647     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3648     ptr_idxs += olengths_idxs[i];
3649     ptr_vals += olengths_vals[i];
3650     if (nis) {
3651       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3652       ptr_idxs_is += olengths_idxs_is[i];
3653     }
3654   }
3655   for (i=0;i<n_sends;i++) {
3656     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3657     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3658     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3659     if (nis) {
3660       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3661     }
3662   }
3663   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3664   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3665 
3666   /* assemble new l2g map */
3667   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3668   ptr_idxs = recv_buffer_idxs;
3669   new_local_rows = 0;
3670   for (i=0;i<n_recvs;i++) {
3671     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3672     ptr_idxs += olengths_idxs[i];
3673   }
3674   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3675   ptr_idxs = recv_buffer_idxs;
3676   new_local_rows = 0;
3677   for (i=0;i<n_recvs;i++) {
3678     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3679     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3680     ptr_idxs += olengths_idxs[i];
3681   }
3682   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3683   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3684   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3685 
3686   /* infer new local matrix type from received local matrices type */
3687   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3688   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3689   if (n_recvs) {
3690     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3691     ptr_idxs = recv_buffer_idxs;
3692     for (i=0;i<n_recvs;i++) {
3693       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3694         new_local_type_private = MATAIJ_PRIVATE;
3695         break;
3696       }
3697       ptr_idxs += olengths_idxs[i];
3698     }
3699     switch (new_local_type_private) {
3700       case MATDENSE_PRIVATE:
3701         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3702           new_local_type = MATSEQAIJ;
3703           bs = 1;
3704         } else { /* if I receive only 1 dense matrix */
3705           new_local_type = MATSEQDENSE;
3706           bs = 1;
3707         }
3708         break;
3709       case MATAIJ_PRIVATE:
3710         new_local_type = MATSEQAIJ;
3711         bs = 1;
3712         break;
3713       case MATBAIJ_PRIVATE:
3714         new_local_type = MATSEQBAIJ;
3715         break;
3716       case MATSBAIJ_PRIVATE:
3717         new_local_type = MATSEQSBAIJ;
3718         break;
3719       default:
3720         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3721         break;
3722     }
3723   } else { /* by default, new_local_type is seqdense */
3724     new_local_type = MATSEQDENSE;
3725     bs = 1;
3726   }
3727 
3728   /* create MATIS object if needed */
3729   if (reuse == MAT_INITIAL_MATRIX) {
3730     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3731     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3732   } else {
3733     /* it also destroys the local matrices */
3734     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3735   }
3736   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3737   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3738 
3739   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3740 
3741   /* Global to local map of received indices */
3742   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3743   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3744   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3745 
3746   /* restore attributes -> type of incoming data and its size */
3747   buf_size_idxs = 0;
3748   for (i=0;i<n_recvs;i++) {
3749     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3750     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3751     buf_size_idxs += (PetscInt)olengths_idxs[i];
3752   }
3753   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3754 
3755   /* set preallocation */
3756   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3757   if (!newisdense) {
3758     PetscInt *new_local_nnz=0;
3759 
3760     ptr_vals = recv_buffer_vals;
3761     ptr_idxs = recv_buffer_idxs_local;
3762     if (n_recvs) {
3763       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3764     }
3765     for (i=0;i<n_recvs;i++) {
3766       PetscInt j;
3767       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3768         for (j=0;j<*(ptr_idxs+1);j++) {
3769           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3770         }
3771       } else {
3772         /* TODO */
3773       }
3774       ptr_idxs += olengths_idxs[i];
3775     }
3776     if (new_local_nnz) {
3777       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3778       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3779       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3780       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3781       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3782       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3783     } else {
3784       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3785     }
3786     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3787   } else {
3788     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3789   }
3790 
3791   /* set values */
3792   ptr_vals = recv_buffer_vals;
3793   ptr_idxs = recv_buffer_idxs_local;
3794   for (i=0;i<n_recvs;i++) {
3795     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3796       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3797       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3798       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3799       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3800       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3801     } else {
3802       /* TODO */
3803     }
3804     ptr_idxs += olengths_idxs[i];
3805     ptr_vals += olengths_vals[i];
3806   }
3807   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3808   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3809   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3810   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3811   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3812   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3813 
3814 #if 0
3815   if (!restrict_comm) { /* check */
3816     Vec       lvec,rvec;
3817     PetscReal infty_error;
3818 
3819     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3820     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3821     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3822     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3823     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3824     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3825     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3826     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3827     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3828   }
3829 #endif
3830 
3831   /* assemble new additional is (if any) */
3832   if (nis) {
3833     PetscInt **temp_idxs,*count_is,j,psum;
3834 
3835     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3836     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3837     ptr_idxs = recv_buffer_idxs_is;
3838     psum = 0;
3839     for (i=0;i<n_recvs;i++) {
3840       for (j=0;j<nis;j++) {
3841         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3842         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3843         psum += plen;
3844         ptr_idxs += plen+1; /* shift pointer to received data */
3845       }
3846     }
3847     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3848     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3849     for (i=1;i<nis;i++) {
3850       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3851     }
3852     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3853     ptr_idxs = recv_buffer_idxs_is;
3854     for (i=0;i<n_recvs;i++) {
3855       for (j=0;j<nis;j++) {
3856         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3857         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3858         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3859         ptr_idxs += plen+1; /* shift pointer to received data */
3860       }
3861     }
3862     for (i=0;i<nis;i++) {
3863       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3864       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3865       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3866     }
3867     ierr = PetscFree(count_is);CHKERRQ(ierr);
3868     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3869     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3870   }
3871   /* free workspace */
3872   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3873   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3874   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3875   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3876   if (isdense) {
3877     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3878     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3879   } else {
3880     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3881   }
3882   if (nis) {
3883     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3884     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3885   }
3886   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3887   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3888   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3889   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3890   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3891   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3892   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3893   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3894   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3895   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3896   ierr = PetscFree(onodes);CHKERRQ(ierr);
3897   if (nis) {
3898     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3899     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3900     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3901   }
3902   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3903   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3904     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3905     for (i=0;i<nis;i++) {
3906       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3907     }
3908   }
3909   PetscFunctionReturn(0);
3910 }
3911 
3912 /* temporary hack into ksp private data structure */
3913 #include <petsc/private/kspimpl.h>
3914 
3915 #undef __FUNCT__
3916 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3917 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3918 {
3919   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3920   PC_IS                  *pcis = (PC_IS*)pc->data;
3921   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3922   MatNullSpace           CoarseNullSpace=NULL;
3923   ISLocalToGlobalMapping coarse_islg;
3924   IS                     coarse_is,*isarray;
3925   PetscInt               i,im_active=-1,active_procs=-1;
3926   PetscInt               nis,nisdofs,nisneu;
3927   PC                     pc_temp;
3928   PCType                 coarse_pc_type;
3929   KSPType                coarse_ksp_type;
3930   PetscBool              multilevel_requested,multilevel_allowed;
3931   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3932   Mat                    t_coarse_mat_is;
3933   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3934   PetscMPIInt            all_procs;
3935   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3936   PetscBool              compute_vecs = PETSC_FALSE;
3937   PetscScalar            *array;
3938   PetscErrorCode         ierr;
3939 
3940   PetscFunctionBegin;
3941   /* Assign global numbering to coarse dofs */
3942   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3943     PetscInt ocoarse_size;
3944     compute_vecs = PETSC_TRUE;
3945     ocoarse_size = pcbddc->coarse_size;
3946     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3947     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3948     /* see if we can avoid some work */
3949     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3950       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3951         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3952         coarse_reuse = PETSC_FALSE;
3953       } else { /* we can safely reuse already computed coarse matrix */
3954         coarse_reuse = PETSC_TRUE;
3955       }
3956     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3957       coarse_reuse = PETSC_FALSE;
3958     }
3959     /* reset any subassembling information */
3960     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3961     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3962   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3963     coarse_reuse = PETSC_TRUE;
3964   }
3965 
3966   /* count "active" (i.e. with positive local size) and "void" processes */
3967   im_active = !!(pcis->n);
3968   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3969   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3970   void_procs = all_procs-active_procs;
3971   csin_type_simple = PETSC_TRUE;
3972   redist = PETSC_FALSE;
3973   if (pcbddc->current_level && void_procs) {
3974     csin_ml = PETSC_TRUE;
3975     ncoarse_ml = void_procs;
3976     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
3977     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
3978       csin_ds = PETSC_TRUE;
3979       ncoarse_ds = pcbddc->redistribute_coarse;
3980       redist = PETSC_TRUE;
3981     } else {
3982       csin_ds = PETSC_TRUE;
3983       ncoarse_ds = active_procs;
3984       redist = PETSC_TRUE;
3985     }
3986   } else {
3987     csin_ml = PETSC_FALSE;
3988     ncoarse_ml = all_procs;
3989     if (void_procs) {
3990       csin_ds = PETSC_TRUE;
3991       ncoarse_ds = void_procs;
3992       csin_type_simple = PETSC_FALSE;
3993     } else {
3994       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
3995         csin_ds = PETSC_TRUE;
3996         ncoarse_ds = pcbddc->redistribute_coarse;
3997         redist = PETSC_TRUE;
3998       } else {
3999         csin_ds = PETSC_FALSE;
4000         ncoarse_ds = all_procs;
4001       }
4002     }
4003   }
4004 
4005   /*
4006     test if we can go multilevel: three conditions must be satisfied:
4007     - we have not exceeded the number of levels requested
4008     - we can actually subassemble the active processes
4009     - we can find a suitable number of MPI processes where we can place the subassembled problem
4010   */
4011   multilevel_allowed = PETSC_FALSE;
4012   multilevel_requested = PETSC_FALSE;
4013   if (pcbddc->current_level < pcbddc->max_levels) {
4014     multilevel_requested = PETSC_TRUE;
4015     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
4016       multilevel_allowed = PETSC_FALSE;
4017     } else {
4018       multilevel_allowed = PETSC_TRUE;
4019     }
4020   }
4021   /* determine number of process partecipating to coarse solver */
4022   if (multilevel_allowed) {
4023     ncoarse = ncoarse_ml;
4024     csin = csin_ml;
4025     redist = PETSC_FALSE;
4026   } else {
4027     ncoarse = ncoarse_ds;
4028     csin = csin_ds;
4029   }
4030 
4031   /* creates temporary l2gmap and IS for coarse indexes */
4032   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
4033   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
4034 
4035   /* creates temporary MATIS object for coarse matrix */
4036   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
4037   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4038   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
4039   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4040 #if 0
4041   {
4042     PetscViewer viewer;
4043     char filename[256];
4044     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
4045     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
4046     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4047     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
4048     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4049   }
4050 #endif
4051   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
4052   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
4053   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4054   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4055   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
4056 
4057   /* compute dofs splitting and neumann boundaries for coarse dofs */
4058   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
4059     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
4060     const PetscInt         *idxs;
4061     ISLocalToGlobalMapping tmap;
4062 
4063     /* create map between primal indices (in local representative ordering) and local primal numbering */
4064     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
4065     /* allocate space for temporary storage */
4066     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
4067     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
4068     /* allocate for IS array */
4069     nisdofs = pcbddc->n_ISForDofsLocal;
4070     nisneu = !!pcbddc->NeumannBoundariesLocal;
4071     nis = nisdofs + nisneu;
4072     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
4073     /* dofs splitting */
4074     for (i=0;i<nisdofs;i++) {
4075       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4076       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4077       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4078       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4079       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4080       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4081       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4082       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4083     }
4084     /* neumann boundaries */
4085     if (pcbddc->NeumannBoundariesLocal) {
4086       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4087       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4088       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4089       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4090       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4091       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4092       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4093       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4094     }
4095     /* free memory */
4096     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4097     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4098     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4099   } else {
4100     nis = 0;
4101     nisdofs = 0;
4102     nisneu = 0;
4103     isarray = NULL;
4104   }
4105   /* destroy no longer needed map */
4106   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4107 
4108   /* restrict on coarse candidates (if needed) */
4109   coarse_mat_is = NULL;
4110   if (csin) {
4111     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4112       if (redist) {
4113         PetscMPIInt rank;
4114         PetscInt    spc,n_spc_p1,dest[1],destsize;
4115 
4116         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4117         spc = active_procs/ncoarse;
4118         n_spc_p1 = active_procs%ncoarse;
4119         if (im_active) {
4120           destsize = 1;
4121           if (rank > n_spc_p1*(spc+1)-1) {
4122             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4123           } else {
4124             dest[0] = rank/(spc+1);
4125           }
4126         } else {
4127           destsize = 0;
4128         }
4129         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4130       } else if (csin_type_simple) {
4131         PetscMPIInt rank;
4132         PetscInt    issize,isidx;
4133 
4134         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4135         if (im_active) {
4136           issize = 1;
4137           isidx = (PetscInt)rank;
4138         } else {
4139           issize = 0;
4140           isidx = -1;
4141         }
4142         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4143       } else { /* get a suitable subassembling pattern from MATIS code */
4144         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4145       }
4146 
4147       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4148       if (!redist || ncoarse <= void_procs) {
4149         PetscInt ncoarse_cand,tissize,*nisindices;
4150         PetscInt *coarse_candidates;
4151         const PetscInt* tisindices;
4152 
4153         /* get coarse candidates' ranks in pc communicator */
4154         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4155         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4156         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4157           if (!coarse_candidates[i]) {
4158             coarse_candidates[ncoarse_cand++]=i;
4159           }
4160         }
4161         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4162 
4163 
4164         if (pcbddc->dbg_flag) {
4165           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4166           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4167           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4168           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4169           for (i=0;i<ncoarse_cand;i++) {
4170             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4171           }
4172           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4173           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4174         }
4175         /* shift the pattern on coarse candidates */
4176         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4177         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4178         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4179         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4180         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4181         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4182         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4183       }
4184       if (pcbddc->dbg_flag) {
4185         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4186         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4187         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4188         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4189       }
4190     }
4191     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4192     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4193   } else {
4194     if (pcbddc->dbg_flag) {
4195       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4196       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4197       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4198     }
4199     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4200     coarse_mat_is = t_coarse_mat_is;
4201   }
4202 
4203   /* create local to global scatters for coarse problem */
4204   if (compute_vecs) {
4205     PetscInt lrows;
4206     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4207     if (coarse_mat_is) {
4208       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4209     } else {
4210       lrows = 0;
4211     }
4212     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4213     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4214     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4215     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4216     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4217   }
4218   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4219   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4220 
4221   /* set defaults for coarse KSP and PC */
4222   if (multilevel_allowed) {
4223     coarse_ksp_type = KSPRICHARDSON;
4224     coarse_pc_type = PCBDDC;
4225   } else {
4226     coarse_ksp_type = KSPPREONLY;
4227     coarse_pc_type = PCREDUNDANT;
4228   }
4229 
4230   /* print some info if requested */
4231   if (pcbddc->dbg_flag) {
4232     if (!multilevel_allowed) {
4233       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4234       if (multilevel_requested) {
4235         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4236       } else if (pcbddc->max_levels) {
4237         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4238       }
4239       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4240     }
4241   }
4242 
4243   /* create the coarse KSP object only once with defaults */
4244   if (coarse_mat_is) {
4245     MatReuse coarse_mat_reuse;
4246     PetscViewer dbg_viewer = NULL;
4247     if (pcbddc->dbg_flag) {
4248       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4249       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4250     }
4251     if (!pcbddc->coarse_ksp) {
4252       char prefix[256],str_level[16];
4253       size_t len;
4254       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4255       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4256       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4257       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4258       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4259       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4260       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4261       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4262       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4263       /* prefix */
4264       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4265       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4266       if (!pcbddc->current_level) {
4267         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4268         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4269       } else {
4270         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4271         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4272         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4273         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4274         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4275         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4276       }
4277       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4278       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4279       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4280       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4281       ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4282       /* allow user customization */
4283       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4284     }
4285     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4286     if (nisdofs) {
4287       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4288       for (i=0;i<nisdofs;i++) {
4289         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4290       }
4291     }
4292     if (nisneu) {
4293       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4294       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4295     }
4296 
4297     /* get some info after set from options */
4298     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4299     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4300     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4301     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4302     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4303       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4304       isbddc = PETSC_FALSE;
4305     }
4306     if (isredundant) {
4307       KSP inner_ksp;
4308       PC inner_pc;
4309       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4310       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4311       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4312     }
4313 
4314     /* assemble coarse matrix */
4315     if (coarse_reuse) {
4316       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4317       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4318       coarse_mat_reuse = MAT_REUSE_MATRIX;
4319     } else {
4320       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4321     }
4322     if (isbddc || isnn) {
4323       if (pcbddc->coarsening_ratio > 1) {
4324         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4325           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4326           if (pcbddc->dbg_flag) {
4327             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4328             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4329             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4330             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4331           }
4332         }
4333         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4334       } else {
4335         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4336         coarse_mat = coarse_mat_is;
4337       }
4338     } else {
4339       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4340     }
4341     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4342 
4343     /* propagate symmetry info to coarse matrix */
4344     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
4345     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4346 
4347     /* set operators */
4348     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4349     if (pcbddc->dbg_flag) {
4350       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4351     }
4352   } else { /* processes non partecipating to coarse solver (if any) */
4353     coarse_mat = 0;
4354   }
4355   ierr = PetscFree(isarray);CHKERRQ(ierr);
4356 #if 0
4357   {
4358     PetscViewer viewer;
4359     char filename[256];
4360     sprintf(filename,"coarse_mat.m");
4361     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4362     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4363     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4364     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4365   }
4366 #endif
4367 
4368   /* Compute coarse null space (special handling by BDDC only) */
4369   if (pcbddc->NullSpace) {
4370     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4371   }
4372 
4373   if (pcbddc->coarse_ksp) {
4374     Vec crhs,csol;
4375     PetscBool ispreonly;
4376     if (CoarseNullSpace) {
4377       if (isbddc) {
4378         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4379       } else {
4380         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4381       }
4382     }
4383     /* setup coarse ksp */
4384     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4385     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4386     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4387     /* hack */
4388     if (!csol) {
4389       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4390     }
4391     if (!crhs) {
4392       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4393     }
4394     /* Check coarse problem if in debug mode or if solving with an iterative method */
4395     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4396     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4397       KSP       check_ksp;
4398       KSPType   check_ksp_type;
4399       PC        check_pc;
4400       Vec       check_vec,coarse_vec;
4401       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4402       PetscInt  its;
4403       PetscBool compute_eigs;
4404       PetscReal *eigs_r,*eigs_c;
4405       PetscInt  neigs;
4406       const char *prefix;
4407 
4408       /* Create ksp object suitable for estimation of extreme eigenvalues */
4409       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4410       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4411       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4412       if (ispreonly) {
4413         check_ksp_type = KSPPREONLY;
4414         compute_eigs = PETSC_FALSE;
4415       } else {
4416         check_ksp_type = KSPGMRES;
4417         compute_eigs = PETSC_TRUE;
4418       }
4419       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4420       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4421       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4422       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4423       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4424       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4425       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4426       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4427       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4428       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4429       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4430       /* create random vec */
4431       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4432       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4433       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4434       if (CoarseNullSpace) {
4435         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4436       }
4437       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4438       /* solve coarse problem */
4439       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4440       if (CoarseNullSpace) {
4441         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4442       }
4443       /* set eigenvalue estimation if preonly has not been requested */
4444       if (compute_eigs) {
4445         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4446         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4447         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4448         lambda_max = eigs_r[neigs-1];
4449         lambda_min = eigs_r[0];
4450         if (pcbddc->use_coarse_estimates) {
4451           if (lambda_max>lambda_min) {
4452             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4453             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4454           }
4455         }
4456       }
4457 
4458       /* check coarse problem residual error */
4459       if (pcbddc->dbg_flag) {
4460         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4461         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4462         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4463         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4464         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4465         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4466         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4467         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4468         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4469         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4470         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4471         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4472         if (compute_eigs) {
4473           PetscReal lambda_max_s,lambda_min_s;
4474           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4475           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4476           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4477           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4478           for (i=0;i<neigs;i++) {
4479             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4480           }
4481         }
4482         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4483         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4484       }
4485       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4486       if (compute_eigs) {
4487         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4488         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4489       }
4490     }
4491   }
4492   /* print additional info */
4493   if (pcbddc->dbg_flag) {
4494     /* waits until all processes reaches this point */
4495     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4496     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4497     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4498   }
4499 
4500   /* free memory */
4501   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4502   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4503   PetscFunctionReturn(0);
4504 }
4505 
4506 #undef __FUNCT__
4507 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4508 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4509 {
4510   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4511   PC_IS*         pcis = (PC_IS*)pc->data;
4512   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4513   PetscInt       i,local_size,coarse_size=0;
4514   PetscInt       *local_primal_indices=NULL;
4515   PetscInt       *t_local_primal_indices=NULL;
4516   PetscErrorCode ierr;
4517 
4518   PetscFunctionBegin;
4519   /* Compute global number of coarse dofs */
4520   if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) {
4521     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
4522   }
4523   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult,&coarse_size,&t_local_primal_indices);CHKERRQ(ierr);
4524   ierr = PetscMalloc1(pcbddc->local_primal_size,&local_primal_indices);CHKERRQ(ierr);
4525   local_size = 0;
4526   for (i=0;i<pcbddc->local_primal_size_cc;i++) {
4527     PetscInt j;
4528     for (j=0;j<pcbddc->local_primal_ref_mult[i];j++) local_primal_indices[local_size++] = t_local_primal_indices[i] + j;
4529   }
4530   ierr = PetscFree(t_local_primal_indices);CHKERRQ(ierr);
4531   if (local_size != pcbddc->local_primal_size) {
4532     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size);
4533   }
4534 
4535   /* check numbering */
4536   if (pcbddc->dbg_flag) {
4537     PetscScalar coarsesum,*array;
4538     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4539 
4540     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4541     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4542     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4543     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4544     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4545     for (i=0;i<pcbddc->local_primal_size;i++) {
4546       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4547     }
4548     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4549     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4550     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4551     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4552     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4553     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4554     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4555     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4556     for (i=0;i<pcis->n;i++) {
4557       if (array[i] == 1.0) {
4558         set_error = PETSC_TRUE;
4559         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4560       }
4561     }
4562     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4563     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4564     for (i=0;i<pcis->n;i++) {
4565       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4566     }
4567     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4568     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4569     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4570     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4571     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4572     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4573     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4574       PetscInt *gidxs;
4575 
4576       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
4577       ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
4578       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4579       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4580       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4581       for (i=0;i<pcbddc->local_primal_size;i++) {
4582         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr);
4583       }
4584       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4585       ierr = PetscFree(gidxs);CHKERRQ(ierr);
4586     }
4587     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4588     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4589   }
4590   /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */
4591   /* get back data */
4592   *coarse_size_n = coarse_size;
4593   *local_primal_indices_n = local_primal_indices;
4594   PetscFunctionReturn(0);
4595 }
4596 
4597 #undef __FUNCT__
4598 #define __FUNCT__ "PCBDDCGlobalToLocal"
4599 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4600 {
4601   IS             localis_t;
4602   PetscInt       i,lsize,*idxs,n;
4603   PetscScalar    *vals;
4604   PetscErrorCode ierr;
4605 
4606   PetscFunctionBegin;
4607   /* get indices in local ordering exploiting local to global map */
4608   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4609   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4610   for (i=0;i<lsize;i++) vals[i] = 1.0;
4611   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4612   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4613   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4614   if (idxs) { /* multilevel guard */
4615     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4616   }
4617   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4618   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4619   ierr = PetscFree(vals);CHKERRQ(ierr);
4620   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4621   /* now compute set in local ordering */
4622   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4623   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4624   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4625   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4626   for (i=0,lsize=0;i<n;i++) {
4627     if (PetscRealPart(vals[i]) > 0.5) {
4628       lsize++;
4629     }
4630   }
4631   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4632   for (i=0,lsize=0;i<n;i++) {
4633     if (PetscRealPart(vals[i]) > 0.5) {
4634       idxs[lsize++] = i;
4635     }
4636   }
4637   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4638   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4639   *localis = localis_t;
4640   PetscFunctionReturn(0);
4641 }
4642 
4643 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4644 #undef __FUNCT__
4645 #define __FUNCT__ "PCBDDCMatMult_Private"
4646 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4647 {
4648   PCBDDCChange_ctx change_ctx;
4649   PetscErrorCode   ierr;
4650 
4651   PetscFunctionBegin;
4652   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4653   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4654   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4655   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4656   PetscFunctionReturn(0);
4657 }
4658 
4659 #undef __FUNCT__
4660 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4661 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4662 {
4663   PCBDDCChange_ctx change_ctx;
4664   PetscErrorCode   ierr;
4665 
4666   PetscFunctionBegin;
4667   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4668   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4669   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4670   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4671   PetscFunctionReturn(0);
4672 }
4673 
4674 #undef __FUNCT__
4675 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4676 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4677 {
4678   PC_IS               *pcis=(PC_IS*)pc->data;
4679   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4680   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4681   Mat                 S_j;
4682   PetscInt            *used_xadj,*used_adjncy;
4683   PetscBool           free_used_adj;
4684   PetscErrorCode      ierr;
4685 
4686   PetscFunctionBegin;
4687   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4688   free_used_adj = PETSC_FALSE;
4689   if (pcbddc->sub_schurs_layers == -1) {
4690     used_xadj = NULL;
4691     used_adjncy = NULL;
4692   } else {
4693     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4694       used_xadj = pcbddc->mat_graph->xadj;
4695       used_adjncy = pcbddc->mat_graph->adjncy;
4696     } else if (pcbddc->computed_rowadj) {
4697       used_xadj = pcbddc->mat_graph->xadj;
4698       used_adjncy = pcbddc->mat_graph->adjncy;
4699     } else {
4700       PetscBool      flg_row=PETSC_FALSE;
4701       const PetscInt *xadj,*adjncy;
4702       PetscInt       nvtxs;
4703 
4704       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4705       if (flg_row) {
4706         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4707         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4708         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4709         free_used_adj = PETSC_TRUE;
4710       } else {
4711         pcbddc->sub_schurs_layers = -1;
4712         used_xadj = NULL;
4713         used_adjncy = NULL;
4714       }
4715       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4716     }
4717   }
4718 
4719   /* setup sub_schurs data */
4720   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4721   if (!sub_schurs->use_mumps) {
4722     /* pcbddc->ksp_D up to date only if not using MUMPS */
4723     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4724     ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4725   } else {
4726     PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
4727     ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4728   }
4729   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4730 
4731   /* free adjacency */
4732   if (free_used_adj) {
4733     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4734   }
4735   PetscFunctionReturn(0);
4736 }
4737 
4738 #undef __FUNCT__
4739 #define __FUNCT__ "PCBDDCInitSubSchurs"
4740 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4741 {
4742   PC_IS               *pcis=(PC_IS*)pc->data;
4743   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4744   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4745   PCBDDCGraph         graph;
4746   PetscErrorCode      ierr;
4747 
4748   PetscFunctionBegin;
4749   /* attach interface graph for determining subsets */
4750   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4751     IS verticesIS;
4752 
4753     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4754     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4755     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr);
4756     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr);
4757     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4758     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4759 /*
4760     if (pcbddc->dbg_flag) {
4761       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4762     }
4763 */
4764   } else {
4765     graph = pcbddc->mat_graph;
4766   }
4767 
4768   /* sub_schurs init */
4769   ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr);
4770 
4771   /* free graph struct */
4772   if (pcbddc->sub_schurs_rebuild) {
4773     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4774   }
4775   PetscFunctionReturn(0);
4776 }
4777