xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision e28d306cefdfc9732ab6ae67ada7c2d6ef2a32cd)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscReal       *eigs,thresh;
20   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
21   PetscBool       allocated_S_St;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscErrorCode  ierr;
26 
27   PetscFunctionBegin;
28   if (!sub_schurs->use_mumps) {
29     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
30   }
31 
32   if (pcbddc->dbg_flag) {
33     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
34     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
35     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
36     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
37   }
38 
39   if (pcbddc->dbg_flag) {
40     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
41   }
42 
43   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
44     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
45   }
46 
47   /* max size of subsets */
48   mss = 0;
49   for (i=0;i<sub_schurs->n_subs;i++) {
50     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
51       PetscInt subset_size;
52       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
53       mss = PetscMax(mss,subset_size);
54     }
55   }
56 
57   /* min/max and threshold */
58   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
59   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
60   nmax = PetscMax(nmin,nmax);
61   allocated_S_St = PETSC_FALSE;
62   if (nmin) {
63     allocated_S_St = PETSC_TRUE;
64   }
65 
66   /* allocate lapack workspace */
67   cum = cum2 = 0;
68   maxneigs = 0;
69   for (i=0;i<sub_schurs->n_subs;i++) {
70     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
71       PetscInt n,subset_size;
72 
73       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
74       n = PetscMin(subset_size,nmax);
75       cum += subset_size;
76       cum2 += subset_size*n;
77       maxneigs = PetscMax(maxneigs,n);
78     }
79   }
80   if (mss) {
81     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
82       PetscBLASInt B_itype = 1;
83       PetscBLASInt B_N = mss;
84       PetscReal    zero = 0.0;
85       PetscReal    eps = 0.0; /* dlamch? */
86 
87       B_lwork = -1;
88       S = NULL;
89       St = NULL;
90       eigs = NULL;
91       eigv = NULL;
92       B_iwork = NULL;
93       B_ifail = NULL;
94       thresh = 1.0;
95       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
96 #if defined(PETSC_USE_COMPLEX)
97       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
98 #else
99       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
100 #endif
101       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
102       ierr = PetscFPTrapPop();CHKERRQ(ierr);
103     } else {
104         /* TODO */
105     }
106   } else {
107     lwork = 0;
108   }
109 
110   nv = 0;
111   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
112     ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr);
113   }
114   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
115   if (allocated_S_St) {
116     ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr);
117   }
118   ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
119 #if defined(PETSC_USE_COMPLEX)
120   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
121 #endif
122   ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
123                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
124                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
125                       nv+cum,&pcbddc->adaptive_constraints_idxs,
126                       nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
127   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
128 
129   maxneigs = 0;
130   cum = cum2 = cumarray = 0;
131   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
132   pcbddc->adaptive_constraints_data_ptr[0] = 0;
133   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
134     const PetscInt *idxs;
135 
136     ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
137     for (cum=0;cum<nv;cum++) {
138       pcbddc->adaptive_constraints_n[cum] = 1;
139       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
140       pcbddc->adaptive_constraints_data[cum] = 1.0;
141       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
142       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
143     }
144     cum2 = cum;
145     ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
146   }
147 
148   if (mss) { /* multilevel */
149     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
150     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
151   }
152 
153   for (i=0;i<sub_schurs->n_subs;i++) {
154     PetscInt j,subset_size;
155 
156     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
157     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
158       const PetscInt *idxs;
159       PetscReal      infty = PETSC_MAX_REAL;
160       PetscInt       eigs_start = 0;
161       PetscBLASInt   B_N;
162 
163       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
164       if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
165         if (sub_schurs->is_hermitian) {
166           PetscInt j;
167           for (j=0;j<subset_size;j++) {
168             ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
169           }
170           for (j=0;j<subset_size;j++) {
171             ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
172           }
173         } else {
174           ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
175           ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
176         }
177       } else {
178         S = Sarray + cumarray;
179         St = Starray + cumarray;
180       }
181 
182       /* Threshold: this is an heuristic for edges */
183       ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
184       thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold;
185 
186       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
187         PetscBLASInt B_itype = 1;
188         PetscBLASInt B_IL, B_IU;
189         PetscReal    eps = -1.0; /* dlamch? */
190         PetscInt     nmin_s;
191 
192         /* ask for eigenvalues larger than thresh */
193         if (pcbddc->dbg_flag) {
194           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
195         }
196         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
197 #if defined(PETSC_USE_COMPLEX)
198         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
199 #else
200         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
201 #endif
202         ierr = PetscFPTrapPop();CHKERRQ(ierr);
203         if (B_ierr) {
204           if (B_ierr < 0 ) {
205             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
206           } else if (B_ierr <= B_N) {
207             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
208           } else {
209             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
210           }
211         }
212 
213         if (B_neigs > nmax) {
214           if (pcbddc->dbg_flag) {
215             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
216           }
217           eigs_start = B_neigs -nmax;
218           B_neigs = nmax;
219         }
220 
221         nmin_s = PetscMin(nmin,B_N);
222         if (B_neigs < nmin_s) {
223           PetscBLASInt B_neigs2;
224 
225           B_IU = B_N - B_neigs;
226           B_IL = B_N - nmin_s + 1;
227           if (pcbddc->dbg_flag) {
228             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
229           }
230           if (sub_schurs->is_hermitian) {
231             PetscInt j;
232             for (j=0;j<subset_size;j++) {
233               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
234             }
235             for (j=0;j<subset_size;j++) {
236               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
237             }
238           } else {
239             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
240             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
241           }
242           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
243 #if defined(PETSC_USE_COMPLEX)
244           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
245 #else
246           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
247 #endif
248           ierr = PetscFPTrapPop();CHKERRQ(ierr);
249           B_neigs += B_neigs2;
250         }
251         if (B_ierr) {
252           if (B_ierr < 0 ) {
253             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
254           } else if (B_ierr <= B_N) {
255             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
256           } else {
257             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
258           }
259         }
260         if (pcbddc->dbg_flag) {
261           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);
262           for (j=0;j<B_neigs;j++) {
263             if (eigs[j] == 0.0) {
264               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");
265             } else {
266               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);
267             }
268           }
269         }
270       } else {
271           /* TODO */
272       }
273 
274       maxneigs = PetscMax(B_neigs,maxneigs);
275       pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
276       if (B_neigs) {
277         ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
278 
279         if (pcbddc->dbg_flag > 1) {
280           PetscInt ii;
281           for (ii=0;ii<B_neigs;ii++) {
282             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
283             for (j=0;j<B_N;j++) {
284               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
285             }
286           }
287         }
288 #if 0
289         for (j=0;j<B_neigs;j++) {
290           PetscBLASInt Blas_N,Blas_one = 1.0;
291           PetscScalar norm;
292           ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
293           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,
294                                                      &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
295           if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
296             norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
297           } else {
298             norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
299           }
300           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
301         }
302 #endif
303         ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
304         pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
305         pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
306         cum++;
307       }
308       ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
309     }
310     /* shift for next computation */
311     cumarray += subset_size*subset_size;
312   }
313   if (pcbddc->dbg_flag) {
314     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
315   }
316 
317   if (mss) {
318     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
319     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
320     /* destroy matrices (junk) */
321     ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
322     ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
323   }
324   if (allocated_S_St) {
325     ierr = PetscFree2(S,St);CHKERRQ(ierr);
326   }
327   ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
328 #if defined(PETSC_USE_COMPLEX)
329   ierr = PetscFree(rwork);CHKERRQ(ierr);
330 #endif
331   if (pcbddc->dbg_flag) {
332     PetscInt maxneigs_r;
333     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
334     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
335   }
336   PetscFunctionReturn(0);
337 }
338 
339 #undef __FUNCT__
340 #define __FUNCT__ "PCBDDCSetUpSolvers"
341 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
342 {
343   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
344   PetscScalar    *coarse_submat_vals;
345   PetscErrorCode ierr;
346 
347   PetscFunctionBegin;
348   /* Setup local scatters R_to_B and (optionally) R_to_D */
349   /* PCBDDCSetUpLocalWorkVectors should be called first! */
350   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
351 
352   /* Setup local neumann solver ksp_R */
353   /* PCBDDCSetUpLocalScatters should be called first! */
354   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
355 
356   /* Change global null space passed in by the user if change of basis has been requested */
357   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
358     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
359   }
360 
361   /*
362      Setup local correction and local part of coarse basis.
363      Gives back the dense local part of the coarse matrix in column major ordering
364   */
365   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
366 
367   /* Compute total number of coarse nodes and setup coarse solver */
368   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
369 
370   /* free */
371   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
372   PetscFunctionReturn(0);
373 }
374 
375 #undef __FUNCT__
376 #define __FUNCT__ "PCBDDCResetCustomization"
377 PetscErrorCode PCBDDCResetCustomization(PC pc)
378 {
379   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
380   PetscErrorCode ierr;
381 
382   PetscFunctionBegin;
383   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
384   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
385   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
386   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
387   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
388   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
389   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
390   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
391   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
392   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
393   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
394   PetscFunctionReturn(0);
395 }
396 
397 #undef __FUNCT__
398 #define __FUNCT__ "PCBDDCResetTopography"
399 PetscErrorCode PCBDDCResetTopography(PC pc)
400 {
401   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
402   PetscErrorCode ierr;
403 
404   PetscFunctionBegin;
405   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
406   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
407   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
408   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
409   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
410   PetscFunctionReturn(0);
411 }
412 
413 #undef __FUNCT__
414 #define __FUNCT__ "PCBDDCResetSolvers"
415 PetscErrorCode PCBDDCResetSolvers(PC pc)
416 {
417   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
418   PetscScalar    *array;
419   PetscErrorCode ierr;
420 
421   PetscFunctionBegin;
422   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
423   if (pcbddc->coarse_phi_B) {
424     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
425     ierr = PetscFree(array);CHKERRQ(ierr);
426   }
427   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
428   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
429   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
430   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
431   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
432   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
433   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
434   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
435   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
436   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
437   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
438   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
439   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
440   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
441   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
442   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
443   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
444   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
445   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
446   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
447   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
448   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
449   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
450   PetscFunctionReturn(0);
451 }
452 
453 #undef __FUNCT__
454 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
455 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
456 {
457   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
458   PC_IS          *pcis = (PC_IS*)pc->data;
459   VecType        impVecType;
460   PetscInt       n_constraints,n_R,old_size;
461   PetscErrorCode ierr;
462 
463   PetscFunctionBegin;
464   if (!pcbddc->ConstraintMatrix) {
465     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
466   }
467   /* get sizes */
468   n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices;
469   n_R = pcis->n-pcbddc->n_vertices;
470   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
471   /* local work vectors (try to avoid unneeded work)*/
472   /* R nodes */
473   old_size = -1;
474   if (pcbddc->vec1_R) {
475     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
476   }
477   if (n_R != old_size) {
478     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
479     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
480     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
481     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
482     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
483     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
484   }
485   /* local primal dofs */
486   old_size = -1;
487   if (pcbddc->vec1_P) {
488     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
489   }
490   if (pcbddc->local_primal_size != old_size) {
491     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
492     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
493     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
494     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
495   }
496   /* local explicit constraints */
497   old_size = -1;
498   if (pcbddc->vec1_C) {
499     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
500   }
501   if (n_constraints && n_constraints != old_size) {
502     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
503     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
504     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
505     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
506   }
507   PetscFunctionReturn(0);
508 }
509 
510 #undef __FUNCT__
511 #define __FUNCT__ "PCBDDCSetUpCorrection"
512 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
513 {
514   PetscErrorCode  ierr;
515   /* pointers to pcis and pcbddc */
516   PC_IS*          pcis = (PC_IS*)pc->data;
517   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
518   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
519   /* submatrices of local problem */
520   Mat             A_RV,A_VR,A_VV,local_auxmat2_R;
521   /* submatrices of local coarse problem */
522   Mat             S_VV,S_CV,S_VC,S_CC;
523   /* working matrices */
524   Mat             C_CR;
525   /* additional working stuff */
526   PC              pc_R;
527   Mat             F;
528   PetscBool       isLU,isCHOL,isILU;
529 
530   PetscScalar     *coarse_submat_vals; /* TODO: use a PETSc matrix */
531   PetscScalar     *work;
532   PetscInt        *idx_V_B;
533   PetscInt        n,n_vertices,n_constraints;
534   PetscInt        i,n_R,n_D,n_B;
535   PetscBool       unsymmetric_check;
536   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
537   MatType         impMatType;
538   /* some shortcuts to scalars */
539   PetscScalar     one=1.0,m_one=-1.0;
540 
541   PetscFunctionBegin;
542   n_vertices = pcbddc->n_vertices;
543   n_constraints = pcbddc->local_primal_size-n_vertices;
544   /* Set Non-overlapping dimensions */
545   n_B = pcis->n_B;
546   n_D = pcis->n - n_B;
547   n_R = pcis->n - n_vertices;
548 
549   /* Set types for local objects needed by BDDC precondtioner */
550   impMatType = MATSEQDENSE;
551 
552   /* vertices in boundary numbering */
553   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
554   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr);
555   if (i != n_vertices) {
556     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
557   }
558 
559   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
560   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
561   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
562   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
563   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
564   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
565   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
566   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
567   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
568   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
569 
570   unsymmetric_check = PETSC_FALSE;
571   /* allocate workspace */
572   n = 0;
573   if (n_constraints) {
574     n += n_R*n_constraints;
575   }
576   if (n_vertices) {
577     n = PetscMax(2*n_R*n_vertices,n);
578     n = PetscMax((n_R+n_B)*n_vertices,n);
579   }
580   if (!pcbddc->issym) {
581     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
582     unsymmetric_check = PETSC_TRUE;
583   }
584   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
585 
586   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
587   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
588   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
589   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
590   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
591   if (isLU || isILU || isCHOL) {
592     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
593   } else if (sub_schurs->reuse_mumps) {
594     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
595     MatFactorType type;
596 
597     F = reuse_mumps->F;
598     ierr = MatGetFactorType(F,&type);CHKERRQ(ierr);
599     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
600   } else {
601     F = NULL;
602   }
603 
604   /* Precompute stuffs needed for preprocessing and application of BDDC*/
605   if (n_constraints) {
606     Mat         M1,M2,M3;
607     Mat         auxmat;
608     IS          is_aux;
609     PetscScalar *array,*array2;
610 
611     ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
612     ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
613 
614     /* Extract constraints on R nodes: C_{CR}  */
615     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
616     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
617     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr);
618 
619     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
620     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
621     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
622     for (i=0;i<n_constraints;i++) {
623       const PetscScalar *row_cmat_values;
624       const PetscInt    *row_cmat_indices;
625       PetscInt          size_of_constraint,j;
626 
627       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
628       for (j=0;j<size_of_constraint;j++) {
629         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
630       }
631       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
632     }
633     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr);
634     if (F) {
635       Mat B;
636 
637       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
638       ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr);
639       ierr = MatDestroy(&B);CHKERRQ(ierr);
640     } else {
641       PetscScalar *marr;
642 
643       ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
644       for (i=0;i<n_constraints;i++) {
645         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
646         ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr);
647         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
648         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
649         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
650       }
651       ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
652     }
653     if (!pcbddc->switch_static) {
654       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr);
655       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
656       ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
657       for (i=0;i<n_constraints;i++) {
658         ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr);
659         ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr);
660         ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
661         ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
662         ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
663         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
664       }
665       ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
666       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
667       ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
668     } else {
669       ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr);
670       pcbddc->local_auxmat2 = local_auxmat2_R;
671       ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
672     }
673     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
674     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
675     ierr = MatScale(M3,m_one);CHKERRQ(ierr);
676     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
677     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
678     if (isCHOL) {
679       ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr);
680     } else {
681       ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
682     }
683     ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr);
684     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
685     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
686     ierr = MatDestroy(&M2);CHKERRQ(ierr);
687     ierr = MatDestroy(&M3);CHKERRQ(ierr);
688     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
689     ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
690     ierr = MatDestroy(&auxmat);CHKERRQ(ierr);
691     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
692     ierr = MatDestroy(&M1);CHKERRQ(ierr);
693   }
694   /* Get submatrices from subdomain matrix */
695   if (n_vertices) {
696     Mat       newmat;
697     IS        is_aux;
698     PetscInt  ibs,mbs;
699     PetscBool issbaij;
700 
701     if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */
702       IS tis;
703 
704       ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr);
705       ierr = ISSort(tis);CHKERRQ(ierr);
706       ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr);
707       ierr = ISDestroy(&tis);CHKERRQ(ierr);
708     } else {
709       ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
710     }
711     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
712     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
713     if (ibs != mbs) { /* need to convert to SEQAIJ */
714       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
715       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
716       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
717       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
718       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
719       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
720     } else {
721       /* this is safe */
722       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
723       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
724       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
725         ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
726         ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
727         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
728         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
729         ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
730         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
731       } else {
732         ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
733         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
734         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
735       }
736       /* need to convert to SeqAIJ for MatMatMult with dense matrix */
737       ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
738     }
739     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
740   }
741 
742   /* Matrix of coarse basis functions (local) */
743   if (pcbddc->coarse_phi_B) {
744     PetscInt on_B,on_primal,on_D=n_D;
745     if (pcbddc->coarse_phi_D) {
746       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
747     }
748     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
749     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
750       PetscScalar *marray;
751 
752       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
753       ierr = PetscFree(marray);CHKERRQ(ierr);
754       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
755       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
756       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
757       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
758     }
759   }
760 
761   if (!pcbddc->coarse_phi_B) {
762     PetscScalar *marray;
763 
764     n = n_B*pcbddc->local_primal_size;
765     if (pcbddc->switch_static || pcbddc->dbg_flag) {
766       n += n_D*pcbddc->local_primal_size;
767     }
768     if (!pcbddc->issym) {
769       n *= 2;
770     }
771     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
772     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
773     n = n_B*pcbddc->local_primal_size;
774     if (pcbddc->switch_static || pcbddc->dbg_flag) {
775       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
776       n += n_D*pcbddc->local_primal_size;
777     }
778     if (!pcbddc->issym) {
779       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
780       if (pcbddc->switch_static || pcbddc->dbg_flag) {
781         n = n_B*pcbddc->local_primal_size;
782         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
783       }
784     } else {
785       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
786       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
787       if (pcbddc->switch_static || pcbddc->dbg_flag) {
788         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
789         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
790       }
791     }
792   }
793   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
794   /* vertices */
795   if (n_vertices) {
796 
797     if (n_R) {
798       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
799       PetscBLASInt B_N,B_one = 1;
800       PetscScalar  *x,*y;
801 
802       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
803       ierr = MatScale(A_RV,m_one);CHKERRQ(ierr);
804       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
805       if (F) { /* TODO could be optimized for symmetric problems */
806         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
807       } else {
808         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
809         for (i=0;i<n_vertices;i++) {
810           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
811           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
812           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
813           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
814           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
815         }
816         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
817       }
818       ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
819       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
820       if (n_constraints) {
821         Mat B;
822 
823         ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
824         for (i=0;i<n_vertices;i++) {
825           ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
826           ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr);
827           ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
828           ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
829           ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
830           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
831         }
832         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
833         ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
834         ierr = MatDestroy(&B);CHKERRQ(ierr);
835         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
836         ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
837         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
838         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
839         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
840         ierr = MatDestroy(&B);CHKERRQ(ierr);
841       }
842       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
843       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
844       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
845       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
846       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
847       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
848       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
849       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
850       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
851       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
852     } else {
853       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
854     }
855     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
856     /* coarse basis functions */
857     for (i=0;i<n_vertices;i++) {
858       PetscScalar *y;
859 
860       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
861       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
862       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
863       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
864       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
865       y[n_B*i+idx_V_B[i]] = 1.0;
866       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
867       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
868 
869       if (pcbddc->switch_static || pcbddc->dbg_flag) {
870         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
871         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
872         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
873         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
874         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
875         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
876       }
877       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
878     }
879   }
880 
881   if (n_constraints) {
882     Mat B;
883 
884     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
885     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
886     ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
887     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
888     if (n_vertices) {
889       if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
890         ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr);
891       } else {
892         Mat S_VCt;
893 
894         ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr);
895         ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
896         ierr = MatDestroy(&S_VCt);CHKERRQ(ierr);
897       }
898     }
899     ierr = MatDestroy(&B);CHKERRQ(ierr);
900     /* coarse basis functions */
901     for (i=0;i<n_constraints;i++) {
902       PetscScalar *y;
903 
904       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
905       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
906       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
907       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
908       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
909       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
910       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
911       if (pcbddc->switch_static || pcbddc->dbg_flag) {
912         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
913         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
914         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
915         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
916         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
917         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
918       }
919       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
920     }
921   }
922   if (n_constraints) {
923     ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr);
924   }
925 
926   /* compute other basis functions for non-symmetric problems */
927   if (!pcbddc->issym) {
928 
929     if (n_constraints) {
930       Mat S_CCT,B_C;
931 
932       /* this is a lazy thing */
933       ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
934       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
935       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
936       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
937       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
938       if (n_vertices) {
939         Mat B_V,S_VCT;
940 
941         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
942         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
943         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
944         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
945         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
946       }
947       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
948     } else { /* if there are no constraints, reset work */
949       ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
950     }
951     if (n_vertices && n_R) {
952       Mat          A_VRT;
953       PetscScalar  *marray;
954       PetscBLASInt B_N,B_one = 1;
955 
956       ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr);
957       ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
958       ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr);
959       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
960       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one));
961       ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr);
962       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
963     }
964 
965     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
966       for (i=0;i<pcbddc->local_primal_size;i++) {
967         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
968         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
969         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
970         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
971         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
972       }
973     } else {
974       for (i=0;i<pcbddc->local_primal_size;i++) {
975         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
976         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
977         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
978         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
979         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
980       }
981     }
982     /* coarse basis functions */
983     for (i=0;i<pcbddc->local_primal_size;i++) {
984       PetscScalar *y;
985 
986       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
987       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
988       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
989       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
990       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
991       if (i<n_vertices) {
992         y[n_B*i+idx_V_B[i]] = 1.0;
993       }
994       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
995       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
996 
997       if (pcbddc->switch_static || pcbddc->dbg_flag) {
998         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
999         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1000         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1001         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1002         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1003         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1004       }
1005       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1006     }
1007   }
1008   /* free memory */
1009   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
1010   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
1011   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
1012   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
1013   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
1014   ierr = PetscFree(work);CHKERRQ(ierr);
1015   if (n_vertices) {
1016     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1017   }
1018   if (n_constraints) {
1019     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1020   }
1021   /* Checking coarse_sub_mat and coarse basis functios */
1022   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1023   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1024   if (pcbddc->dbg_flag) {
1025     Mat         coarse_sub_mat;
1026     Mat         AUXMAT,TM1,TM2,TM3,TM4;
1027     Mat         coarse_phi_D,coarse_phi_B;
1028     Mat         coarse_psi_D,coarse_psi_B;
1029     Mat         A_II,A_BB,A_IB,A_BI;
1030     Mat         C_B,CPHI;
1031     IS          is_dummy;
1032     Vec         mones;
1033     MatType     checkmattype=MATSEQAIJ;
1034     PetscReal   real_value;
1035 
1036     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
1037     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
1038     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
1039     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
1040     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
1041     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
1042     if (unsymmetric_check) {
1043       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
1044       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
1045     }
1046     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
1047 
1048     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1049     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr);
1050     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1051     if (unsymmetric_check) {
1052       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1053       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1054       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1055       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1056       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1057       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1058       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1059       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1060       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1061       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1062       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1063       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1064     } else {
1065       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1066       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1067       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1068       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1069       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1070       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1071       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1072       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1073     }
1074     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1075     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1076     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1077     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1078     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1079     ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1080     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1081     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d      matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1082 
1083     /* check constraints */
1084     ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr);
1085     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1086     ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1087     ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr);
1088     ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1089     ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1090     ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1091     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1092     ierr = MatDestroy(&C_B);CHKERRQ(ierr);
1093     ierr = MatDestroy(&CPHI);CHKERRQ(ierr);
1094     ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1095     ierr = VecDestroy(&mones);CHKERRQ(ierr);
1096 
1097     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1098     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1099     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1100     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1101     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1102     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1103     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1104     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1105     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1106     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1107     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1108     if (unsymmetric_check) {
1109       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1110       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1111     }
1112     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1113   }
1114   /* get back data */
1115   *coarse_submat_vals_n = coarse_submat_vals;
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 #undef __FUNCT__
1120 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1121 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1122 {
1123   Mat            *work_mat;
1124   IS             isrow_s,iscol_s;
1125   PetscBool      rsorted,csorted;
1126   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1127   PetscErrorCode ierr;
1128 
1129   PetscFunctionBegin;
1130   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1131   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1132   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1133   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1134 
1135   if (!rsorted) {
1136     const PetscInt *idxs;
1137     PetscInt *idxs_sorted,i;
1138 
1139     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1140     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1141     for (i=0;i<rsize;i++) {
1142       idxs_perm_r[i] = i;
1143     }
1144     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1145     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1146     for (i=0;i<rsize;i++) {
1147       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1148     }
1149     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1150     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1151   } else {
1152     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1153     isrow_s = isrow;
1154   }
1155 
1156   if (!csorted) {
1157     if (isrow == iscol) {
1158       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1159       iscol_s = isrow_s;
1160     } else {
1161       const PetscInt *idxs;
1162       PetscInt *idxs_sorted,i;
1163 
1164       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1165       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1166       for (i=0;i<csize;i++) {
1167         idxs_perm_c[i] = i;
1168       }
1169       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1170       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1171       for (i=0;i<csize;i++) {
1172         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1173       }
1174       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1175       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1176     }
1177   } else {
1178     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1179     iscol_s = iscol;
1180   }
1181 
1182   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1183 
1184   if (!rsorted || !csorted) {
1185     Mat      new_mat;
1186     IS       is_perm_r,is_perm_c;
1187 
1188     if (!rsorted) {
1189       PetscInt *idxs_r,i;
1190       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1191       for (i=0;i<rsize;i++) {
1192         idxs_r[idxs_perm_r[i]] = i;
1193       }
1194       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1195       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1196     } else {
1197       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1198     }
1199     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1200 
1201     if (!csorted) {
1202       if (isrow_s == iscol_s) {
1203         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1204         is_perm_c = is_perm_r;
1205       } else {
1206         PetscInt *idxs_c,i;
1207         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1208         for (i=0;i<csize;i++) {
1209           idxs_c[idxs_perm_c[i]] = i;
1210         }
1211         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1212         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1213       }
1214     } else {
1215       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1216     }
1217     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1218 
1219     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1220     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1221     work_mat[0] = new_mat;
1222     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1223     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1224   }
1225 
1226   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1227   *B = work_mat[0];
1228   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1229   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1230   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 #undef __FUNCT__
1235 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1236 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1237 {
1238   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1239   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1240   Mat            new_mat;
1241   IS             is_local,is_global;
1242   PetscInt       local_size;
1243   PetscBool      isseqaij;
1244   PetscErrorCode ierr;
1245 
1246   PetscFunctionBegin;
1247   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1248   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1249   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1250   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1251   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1252   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1253   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1254 
1255   /* check */
1256   if (pcbddc->dbg_flag) {
1257     Vec       x,x_change;
1258     PetscReal error;
1259 
1260     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1261     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1262     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1263     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1264     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1265     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1266     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1267     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1268     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1269     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1270     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1271     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1272     ierr = VecDestroy(&x);CHKERRQ(ierr);
1273     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1274   }
1275 
1276   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1277   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1278   if (isseqaij) {
1279     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1280   } else {
1281     Mat work_mat;
1282     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1283     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1284     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1285   }
1286   ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
1287 #if !defined(PETSC_USE_COMPLEX)
1288   ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,pcbddc->issym);CHKERRQ(ierr);
1289 #endif
1290   /*
1291   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1292   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1293   */
1294   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1295   PetscFunctionReturn(0);
1296 }
1297 
1298 #undef __FUNCT__
1299 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1300 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1301 {
1302   PC_IS*          pcis = (PC_IS*)(pc->data);
1303   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
1304   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1305   IS              is_B_reuse_mumps=NULL;
1306   PetscInt        *idx_R_local;
1307   PetscInt        n_vertices,i,j,n_R,n_D,n_B;
1308   PetscInt        vbs,bs;
1309   PetscBT         bitmask=NULL;
1310   PetscErrorCode  ierr;
1311 
1312   PetscFunctionBegin;
1313   /*
1314     No need to setup local scatters if
1315       - primal space is unchanged
1316         AND
1317       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1318         AND
1319       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1320   */
1321   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1322     PetscFunctionReturn(0);
1323   }
1324   /* destroy old objects */
1325   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1326   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1327   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1328   /* Set Non-overlapping dimensions */
1329   n_B = pcis->n_B;
1330   n_D = pcis->n - n_B;
1331   n_vertices = pcbddc->n_vertices;
1332 
1333   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1334   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1335 
1336   /* create auxiliary bitmask */
1337   if (!sub_schurs->reuse_mumps) {
1338     ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1339     for (i=0;i<n_vertices;i++) {
1340       ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr);
1341     }
1342 
1343     for (i=0, n_R=0; i<pcis->n; i++) {
1344       if (!PetscBTLookup(bitmask,i)) {
1345         idx_R_local[n_R++] = i;
1346       }
1347     }
1348   } else { /* A different ordering and auxiliary scatters are needed we are reusing MUMPS Schur solver */
1349     PCBDDCReuseMumps       reuse_mumps = sub_schurs->reuse_mumps;
1350     const PetscInt         *idxs;
1351     PetscInt               schur_size;
1352     IS                     list[2],tisB_N,tisR;
1353 
1354     if (sub_schurs->is_dir) {
1355       list[0] = sub_schurs->is_Ej_all;
1356       list[1] = sub_schurs->is_dir;
1357       ierr = ISGlobalToLocalMappingApplyIS(pcis->BtoNmap,IS_GTOLM_DROP,sub_schurs->is_dir,&list[1]);CHKERRQ(ierr);
1358       ierr = ISConcatenate(PETSC_COMM_SELF,2,list,&is_B_reuse_mumps);CHKERRQ(ierr);
1359       ierr = ISDestroy(&list[1]);CHKERRQ(ierr);
1360     } else {
1361       ierr = ISDuplicate(sub_schurs->is_Ej_all,&is_B_reuse_mumps);CHKERRQ(ierr);
1362     }
1363     ierr = ISGetLocalSize(is_B_reuse_mumps,&schur_size);CHKERRQ(ierr);
1364     ierr = VecScatterDestroy(&reuse_mumps->correction_scatter_B);CHKERRQ(ierr);
1365     ierr = VecScatterCreate(pcis->vec1_B,is_B_reuse_mumps,reuse_mumps->solB,NULL,&reuse_mumps->correction_scatter_B);CHKERRQ(ierr);
1366     ierr = ISLocalToGlobalMappingApplyIS(sub_schurs->BtoNmap,is_B_reuse_mumps,&tisB_N);CHKERRQ(ierr);
1367     list[0] = pcis->is_I_local;
1368     list[1] = tisB_N;
1369     ierr = ISConcatenate(PETSC_COMM_SELF,2,list,&tisR);CHKERRQ(ierr);
1370     ierr = ISDestroy(&tisB_N);CHKERRQ(ierr);
1371     ierr = ISGetLocalSize(tisR,&n_R);CHKERRQ(ierr);
1372     ierr = ISGetIndices(tisR,&idxs);CHKERRQ(ierr);
1373     ierr = PetscMemcpy(idx_R_local,idxs,n_R*sizeof(PetscInt));CHKERRQ(ierr);
1374     ierr = ISRestoreIndices(tisR,&idxs);CHKERRQ(ierr);
1375     ierr = ISDestroy(&tisR);CHKERRQ(ierr);
1376   }
1377 
1378   /* Block code */
1379   vbs = 1;
1380   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1381   if (bs>1 && !(n_vertices%bs)) {
1382     PetscBool is_blocked = PETSC_TRUE;
1383     PetscInt  *vary;
1384     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1385     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1386     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1387     for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
1388     for (i=0; i<n_vertices/bs; i++) {
1389       if (vary[i]!=0 && vary[i]!=bs) {
1390         is_blocked = PETSC_FALSE;
1391         break;
1392       }
1393     }
1394     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1395       vbs = bs;
1396       for (i=0;i<n_R/vbs;i++) {
1397         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1398       }
1399     }
1400     ierr = PetscFree(vary);CHKERRQ(ierr);
1401   }
1402   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1403   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1404 
1405   /* print some info if requested */
1406   if (pcbddc->dbg_flag) {
1407     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1408     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1409     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1410     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1411     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1412     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1413     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1414   }
1415 
1416   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1417   if (!sub_schurs->reuse_mumps) {
1418     IS       is_aux1,is_aux2;
1419     PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1420 
1421     ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1422     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1423     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1424     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1425     for (i=0; i<n_D; i++) {
1426       ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1427     }
1428     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1429     for (i=0, j=0; i<n_R; i++) {
1430       if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1431         aux_array1[j++] = i;
1432       }
1433     }
1434     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1435     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1436     for (i=0, j=0; i<n_B; i++) {
1437       if (!PetscBTLookup(bitmask,is_indices[i])) {
1438         aux_array2[j++] = i;
1439       }
1440     }
1441     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1442     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1443     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1444     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1445     ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1446 
1447     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1448       ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1449       for (i=0, j=0; i<n_R; i++) {
1450         if (PetscBTLookup(bitmask,idx_R_local[i])) {
1451           aux_array1[j++] = i;
1452         }
1453       }
1454       ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1455       ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1456       ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1457     }
1458     ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1459     ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1460   } else {
1461     IS       tis;
1462     PetscInt schur_size;
1463 
1464     ierr = ISGetLocalSize(is_B_reuse_mumps,&schur_size);CHKERRQ(ierr);
1465     ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr);
1466     ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,is_B_reuse_mumps,&pcbddc->R_to_B);CHKERRQ(ierr);
1467     ierr = ISDestroy(&tis);CHKERRQ(ierr);
1468     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1469       ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr);
1470       ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1471       ierr = ISDestroy(&tis);CHKERRQ(ierr);
1472     }
1473   }
1474   ierr = ISDestroy(&is_B_reuse_mumps);CHKERRQ(ierr);
1475   PetscFunctionReturn(0);
1476 }
1477 
1478 
1479 #undef __FUNCT__
1480 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1481 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1482 {
1483   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1484   PC_IS          *pcis = (PC_IS*)pc->data;
1485   PC             pc_temp;
1486   Mat            A_RR;
1487   MatReuse       reuse;
1488   PetscScalar    m_one = -1.0;
1489   PetscReal      value;
1490   PetscInt       n_D,n_R,ibs,mbs;
1491   PetscBool      use_exact,use_exact_reduced,issbaij;
1492   PetscErrorCode ierr;
1493   /* prefixes stuff */
1494   char           dir_prefix[256],neu_prefix[256],str_level[16];
1495   size_t         len;
1496 
1497   PetscFunctionBegin;
1498 
1499   /* compute prefixes */
1500   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1501   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1502   if (!pcbddc->current_level) {
1503     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1504     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1505     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1506     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1507   } else {
1508     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1509     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1510     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1511     len -= 15; /* remove "pc_bddc_coarse_" */
1512     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1513     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1514     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1515     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1516     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1517     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1518     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1519     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1520   }
1521 
1522   /* DIRICHLET PROBLEM */
1523   if (dirichlet) {
1524     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1525     if (pcbddc->issym) {
1526       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1527     }
1528     /* Matrix for Dirichlet problem is pcis->A_II */
1529     n_D = pcis->n - pcis->n_B;
1530     if (!pcbddc->ksp_D) { /* create object if not yet build */
1531       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1532       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1533       /* default */
1534       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1535       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1536       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1537       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1538       if (issbaij) {
1539         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1540       } else {
1541         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1542       }
1543       /* Allow user's customization */
1544       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1545       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1546     }
1547     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1548     if (sub_schurs->reuse_mumps) {
1549       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1550 
1551       ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr);
1552     }
1553     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1554     if (!n_D) {
1555       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1556       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1557     }
1558     /* Set Up KSP for Dirichlet problem of BDDC */
1559     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1560     /* set ksp_D into pcis data */
1561     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1562     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1563     pcis->ksp_D = pcbddc->ksp_D;
1564   }
1565 
1566   /* NEUMANN PROBLEM */
1567   A_RR = 0;
1568   if (neumann) {
1569     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1570     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1571     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1572     if (pcbddc->ksp_R) { /* already created ksp */
1573       PetscInt nn_R;
1574       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1575       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1576       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1577       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1578         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1579         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1580         reuse = MAT_INITIAL_MATRIX;
1581       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1582         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1583           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1584           reuse = MAT_INITIAL_MATRIX;
1585         } else { /* safe to reuse the matrix */
1586           reuse = MAT_REUSE_MATRIX;
1587         }
1588       }
1589       /* last check */
1590       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1591         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1592         reuse = MAT_INITIAL_MATRIX;
1593       }
1594     } else { /* first time, so we need to create the matrix */
1595       reuse = MAT_INITIAL_MATRIX;
1596     }
1597     /* extract A_RR */
1598     if (!sub_schurs->reuse_mumps) {
1599       ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1600       ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1601       if (ibs != mbs) {
1602         Mat newmat;
1603         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1604         ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1605         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1606       } else {
1607         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1608       }
1609       if (pcbddc->issym) {
1610         ierr = MatSetOption(A_RR,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1611       }
1612     } else {
1613       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1614 
1615       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1616       ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr);
1617       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1618     }
1619     if (!pcbddc->ksp_R) { /* create object if not present */
1620       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1621       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1622       /* default */
1623       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1624       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1625       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1626       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1627       if (issbaij) {
1628         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1629       } else {
1630         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1631       }
1632       /* Allow user's customization */
1633       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1634       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1635     }
1636     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1637     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1638     if (!n_R) {
1639       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1640       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1641     }
1642     /* Reuse MUMPS solver if it is present */
1643     if (sub_schurs->reuse_mumps) {
1644       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1645 
1646       ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr);
1647     }
1648     /* Set Up KSP for Neumann problem of BDDC */
1649     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1650   }
1651   /* free Neumann problem's matrix */
1652   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1653 
1654   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1655   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1656     if (pcbddc->dbg_flag) {
1657       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1658       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1659       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1660     }
1661     if (dirichlet) { /* Dirichlet */
1662       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1663       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1664       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1665       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1666       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1667       /* need to be adapted? */
1668       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1669       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1670       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1671       /* print info */
1672       if (pcbddc->dbg_flag) {
1673         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1674         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1675       }
1676       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1677         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1678       }
1679     }
1680     if (neumann) { /* Neumann */
1681       ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr);
1682       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1683       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1684       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1685       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1686       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1687       /* need to be adapted? */
1688       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1689       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1690       /* print info */
1691       if (pcbddc->dbg_flag) {
1692         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1693         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1694       }
1695       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1696         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1697       }
1698     }
1699   }
1700   PetscFunctionReturn(0);
1701 }
1702 
1703 #undef __FUNCT__
1704 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1705 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
1706 {
1707   PetscErrorCode  ierr;
1708   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1709   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1710 
1711   PetscFunctionBegin;
1712   if (!sub_schurs->reuse_mumps) {
1713     ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr);
1714   }
1715   if (!pcbddc->switch_static) {
1716     if (applytranspose && pcbddc->local_auxmat1) {
1717       ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1718       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1719     }
1720     if (!sub_schurs->reuse_mumps) {
1721       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1722       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1723     } else {
1724       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1725 
1726       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhsB,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1727       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhsB,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1728     }
1729   } else {
1730     ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1731     ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1732     ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1733     ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1734     if (applytranspose && pcbddc->local_auxmat1) {
1735       ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr);
1736       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1737       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1738       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1739     }
1740   }
1741   if (!sub_schurs->reuse_mumps) {
1742     if (applytranspose) {
1743       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1744     } else {
1745       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1746     }
1747   } else {
1748     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1749 
1750     if (applytranspose) {
1751       ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhsB,reuse_mumps->solB);CHKERRQ(ierr);
1752     } else {
1753       ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhsB,reuse_mumps->solB);CHKERRQ(ierr);
1754     }
1755   }
1756   ierr = VecSet(inout_B,0.);CHKERRQ(ierr);
1757   if (!pcbddc->switch_static) {
1758     if (!sub_schurs->reuse_mumps) {
1759       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1760       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1761     } else {
1762       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1763 
1764       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->solB,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1765       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->solB,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1766     }
1767     if (!applytranspose && pcbddc->local_auxmat1) {
1768       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1769       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1770     }
1771   } else {
1772     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1773     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1774     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1775     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1776     if (!applytranspose && pcbddc->local_auxmat1) {
1777       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1778       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1779     }
1780     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1781     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1782     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1783     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1784   }
1785   PetscFunctionReturn(0);
1786 }
1787 
1788 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1789 #undef __FUNCT__
1790 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1791 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1792 {
1793   PetscErrorCode ierr;
1794   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1795   PC_IS*            pcis = (PC_IS*)  (pc->data);
1796   const PetscScalar zero = 0.0;
1797 
1798   PetscFunctionBegin;
1799   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1800   if (applytranspose) {
1801     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1802     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1803   } else {
1804     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1805     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1806   }
1807   /* start communications from local primal nodes to rhs of coarse solver */
1808   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1809   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1810   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1811 
1812   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1813   /* TODO remove null space when doing multilevel */
1814   if (pcbddc->coarse_ksp) {
1815     Vec rhs,sol;
1816 
1817     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
1818     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
1819     if (applytranspose) {
1820       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1821     } else {
1822       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1823     }
1824   }
1825 
1826   /* Local solution on R nodes */
1827   if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */
1828     ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr);
1829   }
1830 
1831   /* communications from coarse sol to local primal nodes */
1832   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1833   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1834 
1835   /* Sum contributions from two levels */
1836   if (applytranspose) {
1837     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1838     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1839   } else {
1840     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1841     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1842   }
1843   PetscFunctionReturn(0);
1844 }
1845 
1846 #undef __FUNCT__
1847 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1848 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1849 {
1850   PetscErrorCode ierr;
1851   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1852   PetscScalar    *array;
1853   Vec            from,to;
1854 
1855   PetscFunctionBegin;
1856   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1857     from = pcbddc->coarse_vec;
1858     to = pcbddc->vec1_P;
1859     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1860       Vec tvec;
1861 
1862       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1863       ierr = VecResetArray(tvec);CHKERRQ(ierr);
1864       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1865       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
1866       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
1867       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
1868     }
1869   } else { /* from local to global -> put data in coarse right hand side */
1870     from = pcbddc->vec1_P;
1871     to = pcbddc->coarse_vec;
1872   }
1873   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1874   PetscFunctionReturn(0);
1875 }
1876 
1877 #undef __FUNCT__
1878 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1879 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1880 {
1881   PetscErrorCode ierr;
1882   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1883   PetscScalar    *array;
1884   Vec            from,to;
1885 
1886   PetscFunctionBegin;
1887   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1888     from = pcbddc->coarse_vec;
1889     to = pcbddc->vec1_P;
1890   } else { /* from local to global -> put data in coarse right hand side */
1891     from = pcbddc->vec1_P;
1892     to = pcbddc->coarse_vec;
1893   }
1894   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1895   if (smode == SCATTER_FORWARD) {
1896     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1897       Vec tvec;
1898 
1899       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1900       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
1901       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
1902       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
1903     }
1904   } else {
1905     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
1906      ierr = VecResetArray(from);CHKERRQ(ierr);
1907     }
1908   }
1909   PetscFunctionReturn(0);
1910 }
1911 
1912 /* uncomment for testing purposes */
1913 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1914 #undef __FUNCT__
1915 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1916 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1917 {
1918   PetscErrorCode    ierr;
1919   PC_IS*            pcis = (PC_IS*)(pc->data);
1920   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1921   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1922   /* one and zero */
1923   PetscScalar       one=1.0,zero=0.0;
1924   /* space to store constraints and their local indices */
1925   PetscScalar       *constraints_data;
1926   PetscInt          *constraints_idxs,*constraints_idxs_B;
1927   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
1928   PetscInt          *constraints_n;
1929   /* iterators */
1930   PetscInt          i,j,k,total_counts,total_counts_cc,cum;
1931   /* BLAS integers */
1932   PetscBLASInt      lwork,lierr;
1933   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1934   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1935   /* reuse */
1936   PetscInt          olocal_primal_size,olocal_primal_size_cc;
1937   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
1938   /* change of basis */
1939   PetscBool         qr_needed;
1940   PetscBT           change_basis,qr_needed_idx;
1941   /* auxiliary stuff */
1942   PetscInt          *nnz,*is_indices;
1943   PetscInt          ncc;
1944   /* some quantities */
1945   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1946   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1947 
1948   PetscFunctionBegin;
1949   /* Destroy Mat objects computed previously */
1950   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1951   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1952   /* save info on constraints from previous setup (if any) */
1953   olocal_primal_size = pcbddc->local_primal_size;
1954   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
1955   ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr);
1956   ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
1957   ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
1958   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
1959   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
1960 
1961   /* print some info */
1962   if (pcbddc->dbg_flag) {
1963     IS       vertices;
1964     PetscInt nv,nedges,nfaces;
1965     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
1966     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
1967     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
1968     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1969     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1970     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
1971     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
1972     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
1973     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1974   }
1975 
1976   if (!pcbddc->adaptive_selection) {
1977     IS           ISForVertices,*ISForFaces,*ISForEdges;
1978     MatNullSpace nearnullsp;
1979     const Vec    *nearnullvecs;
1980     Vec          *localnearnullsp;
1981     PetscScalar  *array;
1982     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
1983     PetscBool    nnsp_has_cnst;
1984     /* LAPACK working arrays for SVD or POD */
1985     PetscBool    skip_lapack,boolforchange;
1986     PetscScalar  *work;
1987     PetscReal    *singular_vals;
1988 #if defined(PETSC_USE_COMPLEX)
1989     PetscReal    *rwork;
1990 #endif
1991 #if defined(PETSC_MISSING_LAPACK_GESVD)
1992     PetscScalar  *temp_basis,*correlation_mat;
1993 #else
1994     PetscBLASInt dummy_int=1;
1995     PetscScalar  dummy_scalar=1.;
1996 #endif
1997 
1998     /* Get index sets for faces, edges and vertices from graph */
1999     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
2000     /* free unneeded index sets */
2001     if (!pcbddc->use_vertices) {
2002       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2003     }
2004     if (!pcbddc->use_edges) {
2005       for (i=0;i<n_ISForEdges;i++) {
2006         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2007       }
2008       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2009       n_ISForEdges = 0;
2010     }
2011     if (!pcbddc->use_faces) {
2012       for (i=0;i<n_ISForFaces;i++) {
2013         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2014       }
2015       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2016       n_ISForFaces = 0;
2017     }
2018 
2019 #if defined(PETSC_USE_DEBUG)
2020     /* HACK: when solving singular problems not using vertices, a change of basis is mandatory.
2021        Also use_change_of_basis should be consistent among processors */
2022     if (pcbddc->NullSpace) {
2023       PetscBool tbool[2],gbool[2];
2024 
2025       if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) {
2026         pcbddc->use_change_of_basis = PETSC_TRUE;
2027         if (!ISForEdges) {
2028           pcbddc->use_change_on_faces = PETSC_TRUE;
2029         }
2030       }
2031       tbool[0] = pcbddc->use_change_of_basis;
2032       tbool[1] = pcbddc->use_change_on_faces;
2033       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2034       pcbddc->use_change_of_basis = gbool[0];
2035       pcbddc->use_change_on_faces = gbool[1];
2036     }
2037 #endif
2038 
2039     /* check if near null space is attached to global mat */
2040     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
2041     if (nearnullsp) {
2042       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
2043       /* remove any stored info */
2044       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
2045       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2046       /* store information for BDDC solver reuse */
2047       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
2048       pcbddc->onearnullspace = nearnullsp;
2049       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2050       for (i=0;i<nnsp_size;i++) {
2051         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
2052       }
2053     } else { /* if near null space is not provided BDDC uses constants by default */
2054       nnsp_size = 0;
2055       nnsp_has_cnst = PETSC_TRUE;
2056     }
2057     /* get max number of constraints on a single cc */
2058     max_constraints = nnsp_size;
2059     if (nnsp_has_cnst) max_constraints++;
2060 
2061     /*
2062          Evaluate maximum storage size needed by the procedure
2063          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
2064          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
2065          There can be multiple constraints per connected component
2066                                                                                                                                                            */
2067     n_vertices = 0;
2068     if (ISForVertices) {
2069       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
2070     }
2071     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
2072     ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr);
2073 
2074     total_counts = n_ISForFaces+n_ISForEdges;
2075     total_counts *= max_constraints;
2076     total_counts += n_vertices;
2077     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2078 
2079     total_counts = 0;
2080     max_size_of_constraint = 0;
2081     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
2082       IS used_is;
2083       if (i<n_ISForEdges) {
2084         used_is = ISForEdges[i];
2085       } else {
2086         used_is = ISForFaces[i-n_ISForEdges];
2087       }
2088       ierr = ISGetSize(used_is,&j);CHKERRQ(ierr);
2089       total_counts += j;
2090       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
2091     }
2092     ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr);
2093 
2094     /* get local part of global near null space vectors */
2095     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
2096     for (k=0;k<nnsp_size;k++) {
2097       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
2098       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2099       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2100     }
2101 
2102     /* whether or not to skip lapack calls */
2103     skip_lapack = PETSC_TRUE;
2104     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
2105 
2106     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
2107     if (!skip_lapack) {
2108       PetscScalar temp_work;
2109 
2110 #if defined(PETSC_MISSING_LAPACK_GESVD)
2111       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
2112       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
2113       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
2114       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
2115 #if defined(PETSC_USE_COMPLEX)
2116       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
2117 #endif
2118       /* now we evaluate the optimal workspace using query with lwork=-1 */
2119       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2120       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
2121       lwork = -1;
2122       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2123 #if !defined(PETSC_USE_COMPLEX)
2124       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
2125 #else
2126       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
2127 #endif
2128       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2129       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
2130 #else /* on missing GESVD */
2131       /* SVD */
2132       PetscInt max_n,min_n;
2133       max_n = max_size_of_constraint;
2134       min_n = max_constraints;
2135       if (max_size_of_constraint < max_constraints) {
2136         min_n = max_size_of_constraint;
2137         max_n = max_constraints;
2138       }
2139       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
2140 #if defined(PETSC_USE_COMPLEX)
2141       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
2142 #endif
2143       /* now we evaluate the optimal workspace using query with lwork=-1 */
2144       lwork = -1;
2145       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
2146       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
2147       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
2148       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2149 #if !defined(PETSC_USE_COMPLEX)
2150       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
2151 #else
2152       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
2153 #endif
2154       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2155       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
2156 #endif /* on missing GESVD */
2157       /* Allocate optimal workspace */
2158       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
2159       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
2160     }
2161     /* Now we can loop on constraining sets */
2162     total_counts = 0;
2163     constraints_idxs_ptr[0] = 0;
2164     constraints_data_ptr[0] = 0;
2165     /* vertices */
2166     if (n_vertices) {
2167       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2168       if (nnsp_has_cnst) { /* it considers all possible vertices */
2169         ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2170         for (i=0;i<n_vertices;i++) {
2171           constraints_n[total_counts] = 1;
2172           constraints_data[total_counts] = 1.0;
2173           constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2174           constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2175           total_counts++;
2176         }
2177       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
2178         PetscBool used_vertex;
2179         for (i=0;i<n_vertices;i++) {
2180           used_vertex = PETSC_FALSE;
2181           k = 0;
2182           while (!used_vertex && k<nnsp_size) {
2183             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2184             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
2185               constraints_n[total_counts] = 1;
2186               constraints_idxs[total_counts] = is_indices[i];
2187               constraints_data[total_counts] = 1.0;
2188               constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2189               constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2190               total_counts++;
2191               used_vertex = PETSC_TRUE;
2192             }
2193             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2194             k++;
2195           }
2196         }
2197       }
2198       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2199       n_vertices = total_counts;
2200     }
2201 
2202     /* edges and faces */
2203     total_counts_cc = total_counts;
2204     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2205       IS        used_is;
2206       PetscBool idxs_copied = PETSC_FALSE;
2207 
2208       if (ncc<n_ISForEdges) {
2209         used_is = ISForEdges[ncc];
2210         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2211       } else {
2212         used_is = ISForFaces[ncc-n_ISForEdges];
2213         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2214       }
2215       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2216 
2217       ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr);
2218       ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2219       /* change of basis should not be performed on local periodic nodes */
2220       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2221       if (nnsp_has_cnst) {
2222         PetscScalar quad_value;
2223 
2224         ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2225         idxs_copied = PETSC_TRUE;
2226 
2227         if (!pcbddc->use_nnsp_true) {
2228           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2229         } else {
2230           quad_value = 1.0;
2231         }
2232         for (j=0;j<size_of_constraint;j++) {
2233           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
2234         }
2235         temp_constraints++;
2236         total_counts++;
2237       }
2238       for (k=0;k<nnsp_size;k++) {
2239         PetscReal real_value;
2240         PetscScalar *ptr_to_data;
2241 
2242         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2243         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
2244         for (j=0;j<size_of_constraint;j++) {
2245           ptr_to_data[j] = array[is_indices[j]];
2246         }
2247         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2248         /* check if array is null on the connected component */
2249         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2250         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
2251         if (real_value > 0.0) { /* keep indices and values */
2252           temp_constraints++;
2253           total_counts++;
2254           if (!idxs_copied) {
2255             ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2256             idxs_copied = PETSC_TRUE;
2257           }
2258         }
2259       }
2260       ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2261       valid_constraints = temp_constraints;
2262       if (!pcbddc->use_nnsp_true && temp_constraints) {
2263         if (temp_constraints == 1) { /* just normalize the constraint */
2264           PetscScalar norm,*ptr_to_data;
2265 
2266           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2267           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2268           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
2269           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2270           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
2271         } else { /* perform SVD */
2272           PetscReal   tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2273           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2274 
2275 #if defined(PETSC_MISSING_LAPACK_GESVD)
2276           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2277              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2278              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2279                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2280                 from that computed using LAPACKgesvd
2281              -> This is due to a different computation of eigenvectors in LAPACKheev
2282              -> The quality of the POD-computed basis will be the same */
2283           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2284           /* Store upper triangular part of correlation matrix */
2285           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2286           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2287           for (j=0;j<temp_constraints;j++) {
2288             for (k=0;k<j+1;k++) {
2289               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
2290             }
2291           }
2292           /* compute eigenvalues and eigenvectors of correlation matrix */
2293           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2294           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2295 #if !defined(PETSC_USE_COMPLEX)
2296           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2297 #else
2298           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2299 #endif
2300           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2301           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2302           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2303           j = 0;
2304           while (j < temp_constraints && singular_vals[j] < tol) j++;
2305           total_counts = total_counts-j;
2306           valid_constraints = temp_constraints-j;
2307           /* scale and copy POD basis into used quadrature memory */
2308           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2309           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2310           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2311           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2312           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2313           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2314           if (j<temp_constraints) {
2315             PetscInt ii;
2316             for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
2317             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2318             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2319             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2320             for (k=0;k<temp_constraints-j;k++) {
2321               for (ii=0;ii<size_of_constraint;ii++) {
2322                 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2323               }
2324             }
2325           }
2326 #else  /* on missing GESVD */
2327           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2328           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2329           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2330           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2331 #if !defined(PETSC_USE_COMPLEX)
2332           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2333 #else
2334           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2335 #endif
2336           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2337           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2338           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2339           k = temp_constraints;
2340           if (k > size_of_constraint) k = size_of_constraint;
2341           j = 0;
2342           while (j < k && singular_vals[k-j-1] < tol) j++;
2343           valid_constraints = k-j;
2344           total_counts = total_counts-temp_constraints+valid_constraints;
2345 #endif /* on missing GESVD */
2346         }
2347       }
2348       /* update pointers information */
2349       if (valid_constraints) {
2350         constraints_n[total_counts_cc] = valid_constraints;
2351         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
2352         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
2353         /* set change_of_basis flag */
2354         if (boolforchange) {
2355           PetscBTSet(change_basis,total_counts_cc);
2356         }
2357         total_counts_cc++;
2358       }
2359     }
2360     /* free workspace */
2361     if (!skip_lapack) {
2362       ierr = PetscFree(work);CHKERRQ(ierr);
2363 #if defined(PETSC_USE_COMPLEX)
2364       ierr = PetscFree(rwork);CHKERRQ(ierr);
2365 #endif
2366       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2367 #if defined(PETSC_MISSING_LAPACK_GESVD)
2368       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2369       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2370 #endif
2371     }
2372     for (k=0;k<nnsp_size;k++) {
2373       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2374     }
2375     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2376     /* free index sets of faces, edges and vertices */
2377     for (i=0;i<n_ISForFaces;i++) {
2378       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2379     }
2380     if (n_ISForFaces) {
2381       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2382     }
2383     for (i=0;i<n_ISForEdges;i++) {
2384       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2385     }
2386     if (n_ISForEdges) {
2387       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2388     }
2389     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2390   } else {
2391     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2392 
2393     total_counts = 0;
2394     n_vertices = 0;
2395     if (sub_schurs->is_vertices && pcbddc->use_vertices) {
2396       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
2397     }
2398     max_constraints = 0;
2399     total_counts_cc = 0;
2400     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2401       total_counts += pcbddc->adaptive_constraints_n[i];
2402       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
2403       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2404     }
2405     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
2406     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
2407     constraints_idxs = pcbddc->adaptive_constraints_idxs;
2408     constraints_data = pcbddc->adaptive_constraints_data;
2409     /* constraints_n differs from pcbddc->adaptive_constraints_n */
2410     ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr);
2411     total_counts_cc = 0;
2412     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2413       if (pcbddc->adaptive_constraints_n[i]) {
2414         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
2415       }
2416     }
2417 #if 0
2418     printf("Found %d totals (%d)\n",total_counts_cc,total_counts);
2419     for (i=0;i<total_counts_cc;i++) {
2420       printf("const %d, start %d",i,constraints_idxs_ptr[i]);
2421       printf(" end %d:\n",constraints_idxs_ptr[i+1]);
2422       for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) {
2423         printf(" %d",constraints_idxs[j]);
2424       }
2425       printf("\n");
2426       printf("number of cc: %d\n",constraints_n[i]);
2427     }
2428     for (i=0;i<n_vertices;i++) {
2429       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
2430     }
2431     for (i=0;i<sub_schurs->n_subs;i++) {
2432       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2433     }
2434 #endif
2435 
2436     max_size_of_constraint = 0;
2437     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
2438     ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr);
2439     /* Change of basis */
2440     ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr);
2441     if (pcbddc->use_change_of_basis) {
2442       for (i=0;i<sub_schurs->n_subs;i++) {
2443         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2444           ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr);
2445         }
2446       }
2447     }
2448   }
2449   pcbddc->local_primal_size = total_counts;
2450   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2451 
2452   /* map constraints_idxs in boundary numbering */
2453   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr);
2454   if (i != constraints_idxs_ptr[total_counts_cc]) {
2455     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i);
2456   }
2457 
2458   /* Create constraint matrix */
2459   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2460   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2461   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2462 
2463   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2464   /* determine if a QR strategy is needed for change of basis */
2465   qr_needed = PETSC_FALSE;
2466   ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr);
2467   total_primal_vertices=0;
2468   pcbddc->local_primal_size_cc = 0;
2469   for (i=0;i<total_counts_cc;i++) {
2470     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2471     if (size_of_constraint == 1) {
2472       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
2473       pcbddc->local_primal_size_cc += 1;
2474     } else if (PetscBTLookup(change_basis,i)) {
2475       for (k=0;k<constraints_n[i];k++) {
2476         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2477       }
2478       pcbddc->local_primal_size_cc += constraints_n[i];
2479       if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) {
2480         PetscBTSet(qr_needed_idx,i);
2481         qr_needed = PETSC_TRUE;
2482       }
2483     } else {
2484       pcbddc->local_primal_size_cc += 1;
2485     }
2486   }
2487   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
2488   pcbddc->n_vertices = total_primal_vertices;
2489   /* permute indices in order to have a sorted set of vertices */
2490   ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2491 
2492   ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
2493   ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2494   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
2495 
2496   /* nonzero structure of constraint matrix */
2497   /* and get reference dof for local constraints */
2498   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2499   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
2500 
2501   j = total_primal_vertices;
2502   total_counts = total_primal_vertices;
2503   cum = total_primal_vertices;
2504   for (i=n_vertices;i<total_counts_cc;i++) {
2505     if (!PetscBTLookup(change_basis,i)) {
2506       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
2507       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
2508       cum++;
2509       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2510       for (k=0;k<constraints_n[i];k++) {
2511         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2512         nnz[j+k] = size_of_constraint;
2513       }
2514       j += constraints_n[i];
2515     }
2516   }
2517   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2518   ierr = PetscFree(nnz);CHKERRQ(ierr);
2519 
2520   /* set values in constraint matrix */
2521   for (i=0;i<total_primal_vertices;i++) {
2522     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2523   }
2524   total_counts = total_primal_vertices;
2525   for (i=n_vertices;i<total_counts_cc;i++) {
2526     if (!PetscBTLookup(change_basis,i)) {
2527       PetscInt *cols;
2528 
2529       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2530       cols = constraints_idxs+constraints_idxs_ptr[i];
2531       for (k=0;k<constraints_n[i];k++) {
2532         PetscInt    row = total_counts+k;
2533         PetscScalar *vals;
2534 
2535         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
2536         ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2537       }
2538       total_counts += constraints_n[i];
2539     }
2540   }
2541   /* assembling */
2542   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2543   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2544 
2545   /*
2546   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2547   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2548   */
2549   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2550   if (pcbddc->use_change_of_basis) {
2551     /* dual and primal dofs on a single cc */
2552     PetscInt     dual_dofs,primal_dofs;
2553     /* working stuff for GEQRF */
2554     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2555     PetscBLASInt lqr_work;
2556     /* working stuff for UNGQR */
2557     PetscScalar  *gqr_work,lgqr_work_t;
2558     PetscBLASInt lgqr_work;
2559     /* working stuff for TRTRS */
2560     PetscScalar  *trs_rhs;
2561     PetscBLASInt Blas_NRHS;
2562     /* pointers for values insertion into change of basis matrix */
2563     PetscInt     *start_rows,*start_cols;
2564     PetscScalar  *start_vals;
2565     /* working stuff for values insertion */
2566     PetscBT      is_primal;
2567     PetscInt     *aux_primal_numbering_B;
2568     /* matrix sizes */
2569     PetscInt     global_size,local_size;
2570     /* temporary change of basis */
2571     Mat          localChangeOfBasisMatrix;
2572     /* extra space for debugging */
2573     PetscScalar  *dbg_work;
2574 
2575     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2576     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2577     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2578     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2579     /* nonzeros for local mat */
2580     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2581     for (i=0;i<pcis->n;i++) nnz[i]=1;
2582     for (i=n_vertices;i<total_counts_cc;i++) {
2583       if (PetscBTLookup(change_basis,i)) {
2584         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2585         if (PetscBTLookup(qr_needed_idx,i)) {
2586           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
2587         } else {
2588           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
2589           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
2590         }
2591       }
2592     }
2593     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2594     ierr = PetscFree(nnz);CHKERRQ(ierr);
2595     /* Set initial identity in the matrix */
2596     for (i=0;i<pcis->n;i++) {
2597       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2598     }
2599 
2600     if (pcbddc->dbg_flag) {
2601       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2602       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2603     }
2604 
2605 
2606     /* Now we loop on the constraints which need a change of basis */
2607     /*
2608        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2609        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2610 
2611        Basic blocks of change of basis matrix T computed by
2612 
2613           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2614 
2615             | 1        0   ...        0         s_1/S |
2616             | 0        1   ...        0         s_2/S |
2617             |              ...                        |
2618             | 0        ...            1     s_{n-1}/S |
2619             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2620 
2621             with S = \sum_{i=1}^n s_i^2
2622             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2623                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2624 
2625           - QR decomposition of constraints otherwise
2626     */
2627     if (qr_needed) {
2628       /* space to store Q */
2629       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2630       /* first we issue queries for optimal work */
2631       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2632       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2633       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2634       lqr_work = -1;
2635       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2636       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2637       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2638       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2639       lgqr_work = -1;
2640       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2641       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2642       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2643       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2644       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2645       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2646       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2647       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2648       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2649       /* array to store scaling factors for reflectors */
2650       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2651       /* array to store rhs and solution of triangular solver */
2652       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2653       /* allocating workspace for check */
2654       if (pcbddc->dbg_flag) {
2655         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
2656       }
2657     }
2658     /* array to store whether a node is primal or not */
2659     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2660     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2661     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2662     if (i != total_primal_vertices) {
2663       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2664     }
2665     for (i=0;i<total_primal_vertices;i++) {
2666       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2667     }
2668     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2669 
2670     /* loop on constraints and see whether or not they need a change of basis and compute it */
2671     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
2672       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
2673       if (PetscBTLookup(change_basis,total_counts)) {
2674         /* get constraint info */
2675         primal_dofs = constraints_n[total_counts];
2676         dual_dofs = size_of_constraint-primal_dofs;
2677 
2678         if (pcbddc->dbg_flag) {
2679           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr);
2680         }
2681 
2682         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2683 
2684           /* copy quadrature constraints for change of basis check */
2685           if (pcbddc->dbg_flag) {
2686             ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2687           }
2688           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2689           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2690 
2691           /* compute QR decomposition of constraints */
2692           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2693           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2694           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2695           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2696           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2697           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2698           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2699 
2700           /* explictly compute R^-T */
2701           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2702           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2703           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2704           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2705           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2706           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2707           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2708           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2709           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2710           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2711 
2712           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2713           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2714           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2715           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2716           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2717           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2718           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2719           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2720           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2721 
2722           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2723              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2724              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2725           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2726           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2727           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2728           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2729           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2730           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2731           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2732           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
2733           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2734           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2735 
2736           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2737           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
2738           /* insert cols for primal dofs */
2739           for (j=0;j<primal_dofs;j++) {
2740             start_vals = &qr_basis[j*size_of_constraint];
2741             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2742             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2743           }
2744           /* insert cols for dual dofs */
2745           for (j=0,k=0;j<dual_dofs;k++) {
2746             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
2747               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2748               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2749               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2750               j++;
2751             }
2752           }
2753 
2754           /* check change of basis */
2755           if (pcbddc->dbg_flag) {
2756             PetscInt   ii,jj;
2757             PetscBool valid_qr=PETSC_TRUE;
2758             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2759             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2760             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2761             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2762             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2763             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2764             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2765             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
2766             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2767             for (jj=0;jj<size_of_constraint;jj++) {
2768               for (ii=0;ii<primal_dofs;ii++) {
2769                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2770                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2771               }
2772             }
2773             if (!valid_qr) {
2774               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2775               for (jj=0;jj<size_of_constraint;jj++) {
2776                 for (ii=0;ii<primal_dofs;ii++) {
2777                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2778                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2779                   }
2780                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2781                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2782                   }
2783                 }
2784               }
2785             } else {
2786               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2787             }
2788           }
2789         } else { /* simple transformation block */
2790           PetscInt    row,col;
2791           PetscScalar val,norm;
2792 
2793           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2794           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
2795           for (j=0;j<size_of_constraint;j++) {
2796             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
2797             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2798             if (!PetscBTLookup(is_primal,row_B)) {
2799               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
2800               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2801               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2802             } else {
2803               for (k=0;k<size_of_constraint;k++) {
2804                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2805                 if (row != col) {
2806                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
2807                 } else {
2808                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
2809                 }
2810                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2811               }
2812             }
2813           }
2814           if (pcbddc->dbg_flag) {
2815             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2816           }
2817         }
2818       } else {
2819         if (pcbddc->dbg_flag) {
2820           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr);
2821         }
2822       }
2823     }
2824 
2825     /* free workspace */
2826     if (qr_needed) {
2827       if (pcbddc->dbg_flag) {
2828         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
2829       }
2830       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2831       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2832       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2833       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2834       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2835     }
2836     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2837     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2838     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2839 
2840     /* assembling of global change of variable */
2841     {
2842       Mat      tmat;
2843       PetscInt bs;
2844 
2845       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2846       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2847       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2848       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2849       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2850       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2851       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
2852       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
2853       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2854       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2855       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2856       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2857       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2858       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2859       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2860       ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2861       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2862       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2863     }
2864     /* check */
2865     if (pcbddc->dbg_flag) {
2866       PetscReal error;
2867       Vec       x,x_change;
2868 
2869       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2870       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2871       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2872       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2873       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2874       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2875       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2876       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2877       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2878       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2879       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2880       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2881       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2882       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2883       ierr = VecDestroy(&x);CHKERRQ(ierr);
2884       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2885     }
2886 
2887     /* adapt sub_schurs computed (if any) */
2888     if (pcbddc->use_deluxe_scaling) {
2889       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2890       if (sub_schurs->S_Ej_all) {
2891         Mat S_new,tmat;
2892         IS is_all_N;
2893 
2894         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2895         ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
2896         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2897         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2898         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2899         ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2900         sub_schurs->S_Ej_all = S_new;
2901         ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2902         if (sub_schurs->sum_S_Ej_all) {
2903           ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2904           ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2905           ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2906           sub_schurs->sum_S_Ej_all = S_new;
2907           ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2908         }
2909         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2910       }
2911     }
2912     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2913   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2914     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2915     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2916   }
2917 
2918   /* set up change of basis context */
2919   if (pcbddc->ChangeOfBasisMatrix) {
2920     PCBDDCChange_ctx change_ctx;
2921 
2922     if (!pcbddc->new_global_mat) {
2923       PetscInt global_size,local_size;
2924 
2925       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2926       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2927       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2928       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2929       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2930       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2931       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2932       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2933       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2934     } else {
2935       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2936       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2937       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2938     }
2939     if (!pcbddc->user_ChangeOfBasisMatrix) {
2940       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2941       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2942     } else {
2943       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2944       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2945     }
2946     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2947     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2948     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2949     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2950   }
2951 
2952   /* check if a new primal space has been introduced */
2953   pcbddc->new_primal_space_local = PETSC_TRUE;
2954   if (olocal_primal_size == pcbddc->local_primal_size) {
2955     ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2956     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2957     if (!pcbddc->new_primal_space_local) {
2958       ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2959       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2960     }
2961   }
2962   ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr);
2963   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2964   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2965 
2966   /* flush dbg viewer */
2967   if (pcbddc->dbg_flag) {
2968     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2969   }
2970 
2971   /* free workspace */
2972   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2973   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2974   if (!pcbddc->adaptive_selection) {
2975     ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr);
2976     ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr);
2977   } else {
2978     ierr = PetscFree5(pcbddc->adaptive_constraints_n,
2979                       pcbddc->adaptive_constraints_idxs_ptr,
2980                       pcbddc->adaptive_constraints_data_ptr,
2981                       pcbddc->adaptive_constraints_idxs,
2982                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2983     ierr = PetscFree(constraints_n);CHKERRQ(ierr);
2984     ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr);
2985   }
2986   PetscFunctionReturn(0);
2987 }
2988 
2989 #undef __FUNCT__
2990 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2991 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2992 {
2993   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2994   PC_IS       *pcis = (PC_IS*)pc->data;
2995   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2996   PetscInt    ierr,i,vertex_size,N;
2997   PetscViewer viewer=pcbddc->dbg_viewer;
2998 
2999   PetscFunctionBegin;
3000   /* Reset previously computed graph */
3001   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
3002   /* Init local Graph struct */
3003   ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr);
3004   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);CHKERRQ(ierr);
3005 
3006   /* Check validity of the csr graph passed in by the user */
3007   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
3008     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
3009   }
3010 
3011   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
3012   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
3013     PetscInt  *xadj,*adjncy;
3014     PetscInt  nvtxs;
3015     PetscBool flg_row=PETSC_FALSE;
3016 
3017     if (pcbddc->use_local_adj) {
3018 
3019       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3020       if (flg_row) {
3021         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
3022         pcbddc->computed_rowadj = PETSC_TRUE;
3023       }
3024       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3025     } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */
3026       IS                     is_dummy;
3027       ISLocalToGlobalMapping l2gmap_dummy;
3028       PetscInt               j,sum;
3029       PetscInt               *cxadj,*cadjncy;
3030       const PetscInt         *idxs;
3031       PCBDDCGraph            graph;
3032       PetscBT                is_on_boundary;
3033 
3034       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
3035       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
3036       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
3037       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
3038       ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr);
3039       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
3040       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3041       if (flg_row) {
3042         graph->xadj = xadj;
3043         graph->adjncy = adjncy;
3044       }
3045       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
3046       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
3047       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3048 
3049       if (pcbddc->dbg_flag) {
3050         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr);
3051         for (i=0;i<graph->ncc;i++) {
3052           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
3053         }
3054       }
3055 
3056       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
3057       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3058       for (i=0;i<pcis->n_B;i++) {
3059         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
3060       }
3061       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3062 
3063       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
3064       sum = 0;
3065       for (i=0;i<graph->ncc;i++) {
3066         PetscInt sizecc = 0;
3067         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3068           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3069             sizecc++;
3070           }
3071         }
3072         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3073           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3074             cxadj[graph->queue[j]] = sizecc;
3075           }
3076         }
3077         sum += sizecc*sizecc;
3078       }
3079       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
3080       sum = 0;
3081       for (i=0;i<pcis->n;i++) {
3082         PetscInt temp = cxadj[i];
3083         cxadj[i] = sum;
3084         sum += temp;
3085       }
3086       cxadj[pcis->n] = sum;
3087       for (i=0;i<graph->ncc;i++) {
3088         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3089           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3090             PetscInt k,sizecc = 0;
3091             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
3092               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
3093                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
3094                 sizecc++;
3095               }
3096             }
3097           }
3098         }
3099       }
3100       if (sum) {
3101         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
3102       } else {
3103         ierr = PetscFree(cxadj);CHKERRQ(ierr);
3104         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
3105       }
3106       graph->xadj = 0;
3107       graph->adjncy = 0;
3108       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
3109       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
3110     }
3111   }
3112   if (pcbddc->dbg_flag) {
3113     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3114   }
3115 
3116   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
3117   vertex_size = 1;
3118   if (pcbddc->user_provided_isfordofs) {
3119     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
3120       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3121       for (i=0;i<pcbddc->n_ISForDofs;i++) {
3122         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3123         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
3124       }
3125       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
3126       pcbddc->n_ISForDofs = 0;
3127       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
3128     }
3129     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
3130     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
3131   } else {
3132     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
3133       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
3134       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3135       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3136         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3137       }
3138     }
3139   }
3140 
3141   /* Setup of Graph */
3142   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3143     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
3144   }
3145   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3146     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
3147   }
3148   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr);
3149 
3150   /* Graph's connected components analysis */
3151   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
3152 
3153   /* print some info to stdout */
3154   if (pcbddc->dbg_flag) {
3155     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr);
3156   }
3157 
3158   /* mark topography has done */
3159   pcbddc->recompute_topography = PETSC_FALSE;
3160   PetscFunctionReturn(0);
3161 }
3162 
3163 #undef __FUNCT__
3164 #define __FUNCT__ "PCBDDCSubsetNumbering"
3165 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
3166 {
3167   Vec            local_vec,global_vec;
3168   IS             seqis,paris;
3169   VecScatter     scatter_ctx;
3170   PetscScalar    *array;
3171   PetscInt       *temp_global_dofs;
3172   PetscScalar    globalsum;
3173   PetscInt       i,j,s;
3174   PetscInt       nlocals,first_index,old_index,max_local,max_global;
3175   PetscMPIInt    rank_prec_comm,size_prec_comm;
3176   PetscInt       *dof_sizes,*dof_displs;
3177   PetscBool      first_found;
3178   PetscErrorCode ierr;
3179 
3180   PetscFunctionBegin;
3181   /* mpi buffers */
3182   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
3183   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
3184   j = ( !rank_prec_comm ? size_prec_comm : 0);
3185   ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr);
3186   /* get maximum size of subset */
3187   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
3188   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
3189   max_local = 0;
3190   for (i=0;i<n_local_dofs;i++) {
3191     if (max_local < temp_global_dofs[i] ) {
3192       max_local = temp_global_dofs[i];
3193     }
3194   }
3195   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
3196   max_global++;
3197   max_local = 0;
3198   for (i=0;i<n_local_dofs;i++) {
3199     if (max_local < local_dofs[i] ) {
3200       max_local = local_dofs[i];
3201     }
3202   }
3203   max_local++;
3204   /* allocate workspace */
3205   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
3206   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
3207   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
3208   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
3209   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
3210   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
3211   /* create scatter */
3212   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
3213   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
3214   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
3215   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
3216   ierr = ISDestroy(&paris);CHKERRQ(ierr);
3217   /* init array */
3218   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
3219   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3220   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3221   if (local_dofs_mult) {
3222     for (i=0;i<n_local_dofs;i++) {
3223       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
3224     }
3225   } else {
3226     for (i=0;i<n_local_dofs;i++) {
3227       array[local_dofs[i]]=1.0;
3228     }
3229   }
3230   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3231   /* scatter into global vec and get total number of global dofs */
3232   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3233   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3234   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
3235   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
3236   /* Fill global_vec with cumulative function for global numbering */
3237   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
3238   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
3239   nlocals = 0;
3240   first_index = -1;
3241   first_found = PETSC_FALSE;
3242   for (i=0;i<s;i++) {
3243     if (!first_found && PetscRealPart(array[i]) > 0.1) {
3244       first_found = PETSC_TRUE;
3245       first_index = i;
3246     }
3247     nlocals += (PetscInt)PetscRealPart(array[i]);
3248   }
3249   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3250   if (!rank_prec_comm) {
3251     dof_displs[0]=0;
3252     for (i=1;i<size_prec_comm;i++) {
3253       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3254     }
3255   }
3256   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3257   if (first_found) {
3258     array[first_index] += (PetscScalar)nlocals;
3259     old_index = first_index;
3260     for (i=first_index+1;i<s;i++) {
3261       if (PetscRealPart(array[i]) > 0.1) {
3262         array[i] += array[old_index];
3263         old_index = i;
3264       }
3265     }
3266   }
3267   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
3268   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3269   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3270   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3271   /* get global ordering of local dofs */
3272   ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3273   if (local_dofs_mult) {
3274     for (i=0;i<n_local_dofs;i++) {
3275       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
3276     }
3277   } else {
3278     for (i=0;i<n_local_dofs;i++) {
3279       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
3280     }
3281   }
3282   ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3283   /* free workspace */
3284   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
3285   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
3286   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
3287   ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr);
3288   /* return pointer to global ordering of local dofs */
3289   *global_numbering_subset = temp_global_dofs;
3290   PetscFunctionReturn(0);
3291 }
3292 
3293 #undef __FUNCT__
3294 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3295 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3296 {
3297   PetscInt       i,j;
3298   PetscScalar    *alphas;
3299   PetscErrorCode ierr;
3300 
3301   PetscFunctionBegin;
3302   /* this implements stabilized Gram-Schmidt */
3303   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3304   for (i=0;i<n;i++) {
3305     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3306     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3307     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3308   }
3309   ierr = PetscFree(alphas);CHKERRQ(ierr);
3310   PetscFunctionReturn(0);
3311 }
3312 
3313 #undef __FUNCT__
3314 #define __FUNCT__ "MatISGetSubassemblingPattern"
3315 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
3316 {
3317   Mat             subdomain_adj;
3318   IS              new_ranks,ranks_send_to;
3319   MatPartitioning partitioner;
3320   Mat_IS          *matis;
3321   PetscInt        n_neighs,*neighs,*n_shared,**shared;
3322   PetscInt        prank;
3323   PetscMPIInt     size,rank,color;
3324   PetscInt        *xadj,*adjncy,*oldranks;
3325   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
3326   PetscInt        i,local_size,threshold=0;
3327   PetscErrorCode  ierr;
3328   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3329   PetscSubcomm    subcomm;
3330 
3331   PetscFunctionBegin;
3332   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3333   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3334   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3335 
3336   /* Get info on mapping */
3337   matis = (Mat_IS*)(mat->data);
3338   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3339   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3340 
3341   /* build local CSR graph of subdomains' connectivity */
3342   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3343   xadj[0] = 0;
3344   xadj[1] = PetscMax(n_neighs-1,0);
3345   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3346   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3347 
3348   if (threshold) {
3349     PetscInt xadj_count = 0;
3350     for (i=1;i<n_neighs;i++) {
3351       if (n_shared[i] > threshold) {
3352         adjncy[xadj_count] = neighs[i];
3353         adjncy_wgt[xadj_count] = n_shared[i];
3354         xadj_count++;
3355       }
3356     }
3357     xadj[1] = xadj_count;
3358   } else {
3359     if (xadj[1]) {
3360       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3361       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3362     }
3363   }
3364   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3365   if (use_square) {
3366     for (i=0;i<xadj[1];i++) {
3367       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3368     }
3369   }
3370   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3371 
3372   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3373 
3374   /*
3375     Restrict work on active processes only.
3376   */
3377   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3378   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3379   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3380   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3381   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3382   if (color) {
3383     ierr = PetscFree(xadj);CHKERRQ(ierr);
3384     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3385     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3386   } else {
3387     PetscInt coarsening_ratio;
3388     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
3389     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3390     prank = rank;
3391     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
3392     /*
3393     for (i=0;i<size;i++) {
3394       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3395     }
3396     */
3397     for (i=0;i<xadj[1];i++) {
3398       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3399     }
3400     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3401     ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3402     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3403 
3404     /* Partition */
3405     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
3406     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3407     if (use_vwgt) {
3408       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3409       v_wgt[0] = local_size;
3410       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3411     }
3412     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3413     coarsening_ratio = size/n_subdomains;
3414     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3415     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3416     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3417     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3418 
3419     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3420     if (contiguous) {
3421       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3422     } else {
3423       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3424     }
3425     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3426     /* clean up */
3427     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3428     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3429     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3430     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3431   }
3432   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3433 
3434   /* assemble parallel IS for sends */
3435   i = 1;
3436   if (color) i=0;
3437   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3438 
3439   /* get back IS */
3440   *is_sends = ranks_send_to;
3441   PetscFunctionReturn(0);
3442 }
3443 
3444 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3445 
3446 #undef __FUNCT__
3447 #define __FUNCT__ "MatISSubassemble"
3448 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3449 {
3450   Mat                    local_mat;
3451   Mat_IS                 *matis;
3452   IS                     is_sends_internal;
3453   PetscInt               rows,cols,new_local_rows;
3454   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3455   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3456   ISLocalToGlobalMapping l2gmap;
3457   PetscInt*              l2gmap_indices;
3458   const PetscInt*        is_indices;
3459   MatType                new_local_type;
3460   /* buffers */
3461   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3462   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3463   PetscInt               *recv_buffer_idxs_local;
3464   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3465   /* MPI */
3466   MPI_Comm               comm,comm_n;
3467   PetscSubcomm           subcomm;
3468   PetscMPIInt            n_sends,n_recvs,commsize;
3469   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3470   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3471   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3472   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3473   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3474   PetscErrorCode         ierr;
3475 
3476   PetscFunctionBegin;
3477   /* TODO: add missing checks */
3478   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3479   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3480   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3481   PetscValidLogicalCollectiveInt(mat,nis,7);
3482   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3483   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3484   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3485   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3486   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3487   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3488   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3489   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3490     PetscInt mrows,mcols,mnrows,mncols;
3491     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3492     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3493     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3494     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3495     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3496     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3497   }
3498   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3499   PetscValidLogicalCollectiveInt(mat,bs,0);
3500   /* prepare IS for sending if not provided */
3501   if (!is_sends) {
3502     PetscBool pcontig = PETSC_TRUE;
3503     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3504     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3505   } else {
3506     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3507     is_sends_internal = is_sends;
3508   }
3509 
3510   /* get pointer of MATIS data */
3511   matis = (Mat_IS*)mat->data;
3512 
3513   /* get comm */
3514   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3515 
3516   /* compute number of sends */
3517   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3518   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3519 
3520   /* compute number of receives */
3521   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3522   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3523   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3524   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3525   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3526   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3527   ierr = PetscFree(iflags);CHKERRQ(ierr);
3528 
3529   /* restrict comm if requested */
3530   subcomm = 0;
3531   destroy_mat = PETSC_FALSE;
3532   if (restrict_comm) {
3533     PetscMPIInt color,subcommsize;
3534 
3535     color = 0;
3536     if (!n_recvs && n_sends) color = 1; /* processes sending and not receiving anything will not partecipate in new comm */
3537     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3538     subcommsize = commsize - subcommsize;
3539     /* check if reuse has been requested */
3540     if (reuse == MAT_REUSE_MATRIX) {
3541       if (*mat_n) {
3542         PetscMPIInt subcommsize2;
3543         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3544         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3545         comm_n = PetscObjectComm((PetscObject)*mat_n);
3546       } else {
3547         comm_n = PETSC_COMM_SELF;
3548       }
3549     } else { /* MAT_INITIAL_MATRIX */
3550       PetscMPIInt rank;
3551 
3552       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3553       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3554       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3555       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3556       comm_n = PetscSubcommChild(subcomm);
3557     }
3558     /* flag to destroy *mat_n if not significative */
3559     if (color) destroy_mat = PETSC_TRUE;
3560   } else {
3561     comm_n = comm;
3562   }
3563 
3564   /* prepare send/receive buffers */
3565   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3566   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3567   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3568   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3569   if (nis) {
3570     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3571   }
3572 
3573   /* Get data from local matrices */
3574   if (!isdense) {
3575     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3576     /* TODO: See below some guidelines on how to prepare the local buffers */
3577     /*
3578        send_buffer_vals should contain the raw values of the local matrix
3579        send_buffer_idxs should contain:
3580        - MatType_PRIVATE type
3581        - PetscInt        size_of_l2gmap
3582        - PetscInt        global_row_indices[size_of_l2gmap]
3583        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3584     */
3585   } else {
3586     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3587     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3588     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3589     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3590     send_buffer_idxs[1] = i;
3591     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3592     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3593     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3594     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3595     for (i=0;i<n_sends;i++) {
3596       ilengths_vals[is_indices[i]] = len*len;
3597       ilengths_idxs[is_indices[i]] = len+2;
3598     }
3599   }
3600   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3601   /* additional is (if any) */
3602   if (nis) {
3603     PetscMPIInt psum;
3604     PetscInt j;
3605     for (j=0,psum=0;j<nis;j++) {
3606       PetscInt plen;
3607       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3608       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3609       psum += len+1; /* indices + lenght */
3610     }
3611     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3612     for (j=0,psum=0;j<nis;j++) {
3613       PetscInt plen;
3614       const PetscInt *is_array_idxs;
3615       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3616       send_buffer_idxs_is[psum] = plen;
3617       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3618       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3619       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3620       psum += plen+1; /* indices + lenght */
3621     }
3622     for (i=0;i<n_sends;i++) {
3623       ilengths_idxs_is[is_indices[i]] = psum;
3624     }
3625     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3626   }
3627 
3628   buf_size_idxs = 0;
3629   buf_size_vals = 0;
3630   buf_size_idxs_is = 0;
3631   for (i=0;i<n_recvs;i++) {
3632     buf_size_idxs += (PetscInt)olengths_idxs[i];
3633     buf_size_vals += (PetscInt)olengths_vals[i];
3634     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3635   }
3636   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3637   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3638   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3639 
3640   /* get new tags for clean communications */
3641   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3642   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3643   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3644 
3645   /* allocate for requests */
3646   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3647   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3648   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3649   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3650   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3651   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3652 
3653   /* communications */
3654   ptr_idxs = recv_buffer_idxs;
3655   ptr_vals = recv_buffer_vals;
3656   ptr_idxs_is = recv_buffer_idxs_is;
3657   for (i=0;i<n_recvs;i++) {
3658     source_dest = onodes[i];
3659     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3660     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3661     ptr_idxs += olengths_idxs[i];
3662     ptr_vals += olengths_vals[i];
3663     if (nis) {
3664       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3665       ptr_idxs_is += olengths_idxs_is[i];
3666     }
3667   }
3668   for (i=0;i<n_sends;i++) {
3669     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3670     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3671     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3672     if (nis) {
3673       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3674     }
3675   }
3676   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3677   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3678 
3679   /* assemble new l2g map */
3680   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3681   ptr_idxs = recv_buffer_idxs;
3682   new_local_rows = 0;
3683   for (i=0;i<n_recvs;i++) {
3684     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3685     ptr_idxs += olengths_idxs[i];
3686   }
3687   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3688   ptr_idxs = recv_buffer_idxs;
3689   new_local_rows = 0;
3690   for (i=0;i<n_recvs;i++) {
3691     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3692     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3693     ptr_idxs += olengths_idxs[i];
3694   }
3695   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3696   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3697   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3698 
3699   /* infer new local matrix type from received local matrices type */
3700   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3701   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3702   if (n_recvs) {
3703     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3704     ptr_idxs = recv_buffer_idxs;
3705     for (i=0;i<n_recvs;i++) {
3706       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3707         new_local_type_private = MATAIJ_PRIVATE;
3708         break;
3709       }
3710       ptr_idxs += olengths_idxs[i];
3711     }
3712     switch (new_local_type_private) {
3713       case MATDENSE_PRIVATE:
3714         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3715           new_local_type = MATSEQAIJ;
3716           bs = 1;
3717         } else { /* if I receive only 1 dense matrix */
3718           new_local_type = MATSEQDENSE;
3719           bs = 1;
3720         }
3721         break;
3722       case MATAIJ_PRIVATE:
3723         new_local_type = MATSEQAIJ;
3724         bs = 1;
3725         break;
3726       case MATBAIJ_PRIVATE:
3727         new_local_type = MATSEQBAIJ;
3728         break;
3729       case MATSBAIJ_PRIVATE:
3730         new_local_type = MATSEQSBAIJ;
3731         break;
3732       default:
3733         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3734         break;
3735     }
3736   } else { /* by default, new_local_type is seqdense */
3737     new_local_type = MATSEQDENSE;
3738     bs = 1;
3739   }
3740 
3741   /* create MATIS object if needed */
3742   if (reuse == MAT_INITIAL_MATRIX) {
3743     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3744     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3745   } else {
3746     /* it also destroys the local matrices */
3747     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3748   }
3749   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3750   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3751 
3752   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3753 
3754   /* Global to local map of received indices */
3755   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3756   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3757   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3758 
3759   /* restore attributes -> type of incoming data and its size */
3760   buf_size_idxs = 0;
3761   for (i=0;i<n_recvs;i++) {
3762     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3763     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3764     buf_size_idxs += (PetscInt)olengths_idxs[i];
3765   }
3766   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3767 
3768   /* set preallocation */
3769   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3770   if (!newisdense) {
3771     PetscInt *new_local_nnz=0;
3772 
3773     ptr_vals = recv_buffer_vals;
3774     ptr_idxs = recv_buffer_idxs_local;
3775     if (n_recvs) {
3776       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3777     }
3778     for (i=0;i<n_recvs;i++) {
3779       PetscInt j;
3780       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3781         for (j=0;j<*(ptr_idxs+1);j++) {
3782           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3783         }
3784       } else {
3785         /* TODO */
3786       }
3787       ptr_idxs += olengths_idxs[i];
3788     }
3789     if (new_local_nnz) {
3790       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3791       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3792       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3793       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3794       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3795       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3796     } else {
3797       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3798     }
3799     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3800   } else {
3801     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3802   }
3803 
3804   /* set values */
3805   ptr_vals = recv_buffer_vals;
3806   ptr_idxs = recv_buffer_idxs_local;
3807   for (i=0;i<n_recvs;i++) {
3808     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3809       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3810       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3811       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3812       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3813       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3814     } else {
3815       /* TODO */
3816     }
3817     ptr_idxs += olengths_idxs[i];
3818     ptr_vals += olengths_vals[i];
3819   }
3820   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3821   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3822   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3823   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3824   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3825   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3826 
3827 #if 0
3828   if (!restrict_comm) { /* check */
3829     Vec       lvec,rvec;
3830     PetscReal infty_error;
3831 
3832     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3833     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3834     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3835     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3836     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3837     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3838     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3839     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3840     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3841   }
3842 #endif
3843 
3844   /* assemble new additional is (if any) */
3845   if (nis) {
3846     PetscInt **temp_idxs,*count_is,j,psum;
3847 
3848     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3849     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3850     ptr_idxs = recv_buffer_idxs_is;
3851     psum = 0;
3852     for (i=0;i<n_recvs;i++) {
3853       for (j=0;j<nis;j++) {
3854         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3855         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3856         psum += plen;
3857         ptr_idxs += plen+1; /* shift pointer to received data */
3858       }
3859     }
3860     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3861     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3862     for (i=1;i<nis;i++) {
3863       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3864     }
3865     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3866     ptr_idxs = recv_buffer_idxs_is;
3867     for (i=0;i<n_recvs;i++) {
3868       for (j=0;j<nis;j++) {
3869         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3870         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3871         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3872         ptr_idxs += plen+1; /* shift pointer to received data */
3873       }
3874     }
3875     for (i=0;i<nis;i++) {
3876       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3877       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3878       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3879     }
3880     ierr = PetscFree(count_is);CHKERRQ(ierr);
3881     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3882     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3883   }
3884   /* free workspace */
3885   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3886   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3887   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3888   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3889   if (isdense) {
3890     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3891     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3892   } else {
3893     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3894   }
3895   if (nis) {
3896     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3897     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3898   }
3899   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3900   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3901   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3902   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3903   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3904   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3905   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3906   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3907   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3908   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3909   ierr = PetscFree(onodes);CHKERRQ(ierr);
3910   if (nis) {
3911     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3912     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3913     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3914   }
3915   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3916   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3917     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3918     for (i=0;i<nis;i++) {
3919       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3920     }
3921   }
3922   PetscFunctionReturn(0);
3923 }
3924 
3925 /* temporary hack into ksp private data structure */
3926 #include <petsc/private/kspimpl.h>
3927 
3928 #undef __FUNCT__
3929 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3930 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3931 {
3932   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3933   PC_IS                  *pcis = (PC_IS*)pc->data;
3934   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3935   MatNullSpace           CoarseNullSpace=NULL;
3936   ISLocalToGlobalMapping coarse_islg;
3937   IS                     coarse_is,*isarray;
3938   PetscInt               i,im_active=-1,active_procs=-1;
3939   PetscInt               nis,nisdofs,nisneu;
3940   PC                     pc_temp;
3941   PCType                 coarse_pc_type;
3942   KSPType                coarse_ksp_type;
3943   PetscBool              multilevel_requested,multilevel_allowed;
3944   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3945   Mat                    t_coarse_mat_is;
3946   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3947   PetscMPIInt            all_procs;
3948   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3949   PetscBool              compute_vecs = PETSC_FALSE;
3950   PetscScalar            *array;
3951   PetscErrorCode         ierr;
3952 
3953   PetscFunctionBegin;
3954   /* Assign global numbering to coarse dofs */
3955   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3956     PetscInt ocoarse_size;
3957     compute_vecs = PETSC_TRUE;
3958     ocoarse_size = pcbddc->coarse_size;
3959     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3960     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3961     /* see if we can avoid some work */
3962     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3963       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3964         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3965         coarse_reuse = PETSC_FALSE;
3966       } else { /* we can safely reuse already computed coarse matrix */
3967         coarse_reuse = PETSC_TRUE;
3968       }
3969     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3970       coarse_reuse = PETSC_FALSE;
3971     }
3972     /* reset any subassembling information */
3973     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3974     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3975   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3976     coarse_reuse = PETSC_TRUE;
3977   }
3978 
3979   /* count "active" (i.e. with positive local size) and "void" processes */
3980   im_active = !!(pcis->n);
3981   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3982   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3983   void_procs = all_procs-active_procs;
3984   csin_type_simple = PETSC_TRUE;
3985   redist = PETSC_FALSE;
3986   if (pcbddc->current_level && void_procs) {
3987     csin_ml = PETSC_TRUE;
3988     ncoarse_ml = void_procs;
3989     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
3990     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
3991       csin_ds = PETSC_TRUE;
3992       ncoarse_ds = pcbddc->redistribute_coarse;
3993       redist = PETSC_TRUE;
3994     } else {
3995       csin_ds = PETSC_TRUE;
3996       ncoarse_ds = active_procs;
3997       redist = PETSC_TRUE;
3998     }
3999   } else {
4000     csin_ml = PETSC_FALSE;
4001     ncoarse_ml = all_procs;
4002     if (void_procs) {
4003       csin_ds = PETSC_TRUE;
4004       ncoarse_ds = void_procs;
4005       csin_type_simple = PETSC_FALSE;
4006     } else {
4007       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
4008         csin_ds = PETSC_TRUE;
4009         ncoarse_ds = pcbddc->redistribute_coarse;
4010         redist = PETSC_TRUE;
4011       } else {
4012         csin_ds = PETSC_FALSE;
4013         ncoarse_ds = all_procs;
4014       }
4015     }
4016   }
4017 
4018   /*
4019     test if we can go multilevel: three conditions must be satisfied:
4020     - we have not exceeded the number of levels requested
4021     - we can actually subassemble the active processes
4022     - we can find a suitable number of MPI processes where we can place the subassembled problem
4023   */
4024   multilevel_allowed = PETSC_FALSE;
4025   multilevel_requested = PETSC_FALSE;
4026   if (pcbddc->current_level < pcbddc->max_levels) {
4027     multilevel_requested = PETSC_TRUE;
4028     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
4029       multilevel_allowed = PETSC_FALSE;
4030     } else {
4031       multilevel_allowed = PETSC_TRUE;
4032     }
4033   }
4034   /* determine number of process partecipating to coarse solver */
4035   if (multilevel_allowed) {
4036     ncoarse = ncoarse_ml;
4037     csin = csin_ml;
4038     redist = PETSC_FALSE;
4039   } else {
4040     ncoarse = ncoarse_ds;
4041     csin = csin_ds;
4042   }
4043 
4044   /* creates temporary l2gmap and IS for coarse indexes */
4045   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
4046   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
4047 
4048   /* creates temporary MATIS object for coarse matrix */
4049   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
4050   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4051   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
4052   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4053 #if 0
4054   {
4055     PetscViewer viewer;
4056     char filename[256];
4057     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
4058     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
4059     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4060     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
4061     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4062   }
4063 #endif
4064   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
4065   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
4066   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4067   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4068   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
4069 
4070   /* compute dofs splitting and neumann boundaries for coarse dofs */
4071   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
4072     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
4073     const PetscInt         *idxs;
4074     ISLocalToGlobalMapping tmap;
4075 
4076     /* create map between primal indices (in local representative ordering) and local primal numbering */
4077     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
4078     /* allocate space for temporary storage */
4079     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
4080     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
4081     /* allocate for IS array */
4082     nisdofs = pcbddc->n_ISForDofsLocal;
4083     nisneu = !!pcbddc->NeumannBoundariesLocal;
4084     nis = nisdofs + nisneu;
4085     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
4086     /* dofs splitting */
4087     for (i=0;i<nisdofs;i++) {
4088       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4089       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4090       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4091       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4092       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4093       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4094       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4095       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4096     }
4097     /* neumann boundaries */
4098     if (pcbddc->NeumannBoundariesLocal) {
4099       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4100       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4101       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4102       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4103       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4104       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4105       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4106       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4107     }
4108     /* free memory */
4109     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4110     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4111     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4112   } else {
4113     nis = 0;
4114     nisdofs = 0;
4115     nisneu = 0;
4116     isarray = NULL;
4117   }
4118   /* destroy no longer needed map */
4119   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4120 
4121   /* restrict on coarse candidates (if needed) */
4122   coarse_mat_is = NULL;
4123   if (csin) {
4124     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4125       if (redist) {
4126         PetscMPIInt rank;
4127         PetscInt    spc,n_spc_p1,dest[1],destsize;
4128 
4129         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4130         spc = active_procs/ncoarse;
4131         n_spc_p1 = active_procs%ncoarse;
4132         if (im_active) {
4133           destsize = 1;
4134           if (rank > n_spc_p1*(spc+1)-1) {
4135             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4136           } else {
4137             dest[0] = rank/(spc+1);
4138           }
4139         } else {
4140           destsize = 0;
4141         }
4142         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4143       } else if (csin_type_simple) {
4144         PetscMPIInt rank;
4145         PetscInt    issize,isidx;
4146 
4147         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4148         if (im_active) {
4149           issize = 1;
4150           isidx = (PetscInt)rank;
4151         } else {
4152           issize = 0;
4153           isidx = -1;
4154         }
4155         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4156       } else { /* get a suitable subassembling pattern from MATIS code */
4157         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4158       }
4159 
4160       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4161       if (!redist || ncoarse <= void_procs) {
4162         PetscInt ncoarse_cand,tissize,*nisindices;
4163         PetscInt *coarse_candidates;
4164         const PetscInt* tisindices;
4165 
4166         /* get coarse candidates' ranks in pc communicator */
4167         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4168         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4169         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4170           if (!coarse_candidates[i]) {
4171             coarse_candidates[ncoarse_cand++]=i;
4172           }
4173         }
4174         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4175 
4176 
4177         if (pcbddc->dbg_flag) {
4178           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4179           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4180           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4181           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4182           for (i=0;i<ncoarse_cand;i++) {
4183             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4184           }
4185           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4186           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4187         }
4188         /* shift the pattern on coarse candidates */
4189         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4190         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4191         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4192         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4193         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4194         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4195         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4196       }
4197       if (pcbddc->dbg_flag) {
4198         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4199         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4200         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4201         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4202       }
4203     }
4204     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4205     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4206   } else {
4207     if (pcbddc->dbg_flag) {
4208       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4209       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4210       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4211     }
4212     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4213     coarse_mat_is = t_coarse_mat_is;
4214   }
4215 
4216   /* create local to global scatters for coarse problem */
4217   if (compute_vecs) {
4218     PetscInt lrows;
4219     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4220     if (coarse_mat_is) {
4221       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4222     } else {
4223       lrows = 0;
4224     }
4225     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4226     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4227     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4228     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4229     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4230   }
4231   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4232   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4233 
4234   /* set defaults for coarse KSP and PC */
4235   if (multilevel_allowed) {
4236     coarse_ksp_type = KSPRICHARDSON;
4237     coarse_pc_type = PCBDDC;
4238   } else {
4239     coarse_ksp_type = KSPPREONLY;
4240     coarse_pc_type = PCREDUNDANT;
4241   }
4242 
4243   /* print some info if requested */
4244   if (pcbddc->dbg_flag) {
4245     if (!multilevel_allowed) {
4246       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4247       if (multilevel_requested) {
4248         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4249       } else if (pcbddc->max_levels) {
4250         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4251       }
4252       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4253     }
4254   }
4255 
4256   /* create the coarse KSP object only once with defaults */
4257   if (coarse_mat_is) {
4258     MatReuse coarse_mat_reuse;
4259     PetscViewer dbg_viewer = NULL;
4260     if (pcbddc->dbg_flag) {
4261       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4262       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4263     }
4264     if (!pcbddc->coarse_ksp) {
4265       char prefix[256],str_level[16];
4266       size_t len;
4267       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4268       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4269       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4270       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4271       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4272       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4273       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4274       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4275       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4276       /* prefix */
4277       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4278       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4279       if (!pcbddc->current_level) {
4280         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4281         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4282       } else {
4283         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4284         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4285         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4286         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4287         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4288         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4289       }
4290       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4291       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4292       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4293       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4294       ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4295       /* allow user customization */
4296       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4297     }
4298     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4299     if (nisdofs) {
4300       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4301       for (i=0;i<nisdofs;i++) {
4302         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4303       }
4304     }
4305     if (nisneu) {
4306       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4307       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4308     }
4309 
4310     /* get some info after set from options */
4311     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4312     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4313     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4314     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4315     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4316       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4317       isbddc = PETSC_FALSE;
4318     }
4319     if (isredundant) {
4320       KSP inner_ksp;
4321       PC inner_pc;
4322       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4323       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4324       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4325     }
4326 
4327     /* assemble coarse matrix */
4328     if (coarse_reuse) {
4329       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4330       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4331       coarse_mat_reuse = MAT_REUSE_MATRIX;
4332     } else {
4333       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4334     }
4335     if (isbddc || isnn) {
4336       if (pcbddc->coarsening_ratio > 1) {
4337         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4338           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4339           if (pcbddc->dbg_flag) {
4340             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4341             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4342             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4343             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4344           }
4345         }
4346         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4347       } else {
4348         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4349         coarse_mat = coarse_mat_is;
4350       }
4351     } else {
4352       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4353     }
4354     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4355 
4356     /* propagate symmetry info to coarse matrix */
4357     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
4358     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4359 
4360     /* set operators */
4361     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4362     if (pcbddc->dbg_flag) {
4363       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4364     }
4365   } else { /* processes non partecipating to coarse solver (if any) */
4366     coarse_mat = 0;
4367   }
4368   ierr = PetscFree(isarray);CHKERRQ(ierr);
4369 #if 0
4370   {
4371     PetscViewer viewer;
4372     char filename[256];
4373     sprintf(filename,"coarse_mat.m");
4374     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4375     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4376     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4377     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4378   }
4379 #endif
4380 
4381   /* Compute coarse null space (special handling by BDDC only) */
4382   if (pcbddc->NullSpace) {
4383     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4384   }
4385 
4386   if (pcbddc->coarse_ksp) {
4387     Vec crhs,csol;
4388     PetscBool ispreonly;
4389     if (CoarseNullSpace) {
4390       if (isbddc) {
4391         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4392       } else {
4393         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4394       }
4395     }
4396     /* setup coarse ksp */
4397     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4398     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4399     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4400     /* hack */
4401     if (!csol) {
4402       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4403     }
4404     if (!crhs) {
4405       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4406     }
4407     /* Check coarse problem if in debug mode or if solving with an iterative method */
4408     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4409     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4410       KSP       check_ksp;
4411       KSPType   check_ksp_type;
4412       PC        check_pc;
4413       Vec       check_vec,coarse_vec;
4414       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4415       PetscInt  its;
4416       PetscBool compute_eigs;
4417       PetscReal *eigs_r,*eigs_c;
4418       PetscInt  neigs;
4419       const char *prefix;
4420 
4421       /* Create ksp object suitable for estimation of extreme eigenvalues */
4422       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4423       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4424       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4425       if (ispreonly) {
4426         check_ksp_type = KSPPREONLY;
4427         compute_eigs = PETSC_FALSE;
4428       } else {
4429         check_ksp_type = KSPGMRES;
4430         compute_eigs = PETSC_TRUE;
4431       }
4432       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4433       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4434       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4435       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4436       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4437       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4438       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4439       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4440       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4441       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4442       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4443       /* create random vec */
4444       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4445       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4446       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4447       if (CoarseNullSpace) {
4448         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4449       }
4450       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4451       /* solve coarse problem */
4452       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4453       if (CoarseNullSpace) {
4454         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4455       }
4456       /* set eigenvalue estimation if preonly has not been requested */
4457       if (compute_eigs) {
4458         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4459         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4460         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4461         lambda_max = eigs_r[neigs-1];
4462         lambda_min = eigs_r[0];
4463         if (pcbddc->use_coarse_estimates) {
4464           if (lambda_max>lambda_min) {
4465             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4466             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4467           }
4468         }
4469       }
4470 
4471       /* check coarse problem residual error */
4472       if (pcbddc->dbg_flag) {
4473         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4474         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4475         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4476         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4477         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4478         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4479         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4480         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4481         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4482         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4483         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4484         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4485         if (compute_eigs) {
4486           PetscReal lambda_max_s,lambda_min_s;
4487           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4488           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4489           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4490           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4491           for (i=0;i<neigs;i++) {
4492             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4493           }
4494         }
4495         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4496         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4497       }
4498       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4499       if (compute_eigs) {
4500         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4501         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4502       }
4503     }
4504   }
4505   /* print additional info */
4506   if (pcbddc->dbg_flag) {
4507     /* waits until all processes reaches this point */
4508     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4509     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4510     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4511   }
4512 
4513   /* free memory */
4514   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4515   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4516   PetscFunctionReturn(0);
4517 }
4518 
4519 #undef __FUNCT__
4520 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4521 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4522 {
4523   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4524   PC_IS*         pcis = (PC_IS*)pc->data;
4525   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4526   PetscInt       i,local_size,coarse_size=0;
4527   PetscInt       *local_primal_indices=NULL;
4528   PetscInt       *t_local_primal_indices=NULL;
4529   PetscErrorCode ierr;
4530 
4531   PetscFunctionBegin;
4532   /* Compute global number of coarse dofs */
4533   if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) {
4534     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
4535   }
4536   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult,&coarse_size,&t_local_primal_indices);CHKERRQ(ierr);
4537   ierr = PetscMalloc1(pcbddc->local_primal_size,&local_primal_indices);CHKERRQ(ierr);
4538   local_size = 0;
4539   for (i=0;i<pcbddc->local_primal_size_cc;i++) {
4540     PetscInt j;
4541     for (j=0;j<pcbddc->local_primal_ref_mult[i];j++) local_primal_indices[local_size++] = t_local_primal_indices[i] + j;
4542   }
4543   ierr = PetscFree(t_local_primal_indices);CHKERRQ(ierr);
4544   if (local_size != pcbddc->local_primal_size) {
4545     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size);
4546   }
4547 
4548   /* check numbering */
4549   if (pcbddc->dbg_flag) {
4550     PetscScalar coarsesum,*array;
4551     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4552 
4553     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4554     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4555     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4556     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4557     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4558     for (i=0;i<pcbddc->local_primal_size;i++) {
4559       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4560     }
4561     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4562     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4563     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4564     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4565     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4566     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4567     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4568     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4569     for (i=0;i<pcis->n;i++) {
4570       if (array[i] == 1.0) {
4571         set_error = PETSC_TRUE;
4572         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4573       }
4574     }
4575     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4576     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4577     for (i=0;i<pcis->n;i++) {
4578       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4579     }
4580     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4581     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4582     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4583     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4584     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4585     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4586     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4587       PetscInt *gidxs;
4588 
4589       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
4590       ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
4591       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4592       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4593       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4594       for (i=0;i<pcbddc->local_primal_size;i++) {
4595         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr);
4596       }
4597       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4598       ierr = PetscFree(gidxs);CHKERRQ(ierr);
4599     }
4600     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4601     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4602   }
4603   /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */
4604   /* get back data */
4605   *coarse_size_n = coarse_size;
4606   *local_primal_indices_n = local_primal_indices;
4607   PetscFunctionReturn(0);
4608 }
4609 
4610 #undef __FUNCT__
4611 #define __FUNCT__ "PCBDDCGlobalToLocal"
4612 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4613 {
4614   IS             localis_t;
4615   PetscInt       i,lsize,*idxs,n;
4616   PetscScalar    *vals;
4617   PetscErrorCode ierr;
4618 
4619   PetscFunctionBegin;
4620   /* get indices in local ordering exploiting local to global map */
4621   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4622   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4623   for (i=0;i<lsize;i++) vals[i] = 1.0;
4624   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4625   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4626   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4627   if (idxs) { /* multilevel guard */
4628     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4629   }
4630   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4631   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4632   ierr = PetscFree(vals);CHKERRQ(ierr);
4633   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4634   /* now compute set in local ordering */
4635   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4636   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4637   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4638   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4639   for (i=0,lsize=0;i<n;i++) {
4640     if (PetscRealPart(vals[i]) > 0.5) {
4641       lsize++;
4642     }
4643   }
4644   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4645   for (i=0,lsize=0;i<n;i++) {
4646     if (PetscRealPart(vals[i]) > 0.5) {
4647       idxs[lsize++] = i;
4648     }
4649   }
4650   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4651   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4652   *localis = localis_t;
4653   PetscFunctionReturn(0);
4654 }
4655 
4656 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4657 #undef __FUNCT__
4658 #define __FUNCT__ "PCBDDCMatMult_Private"
4659 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4660 {
4661   PCBDDCChange_ctx change_ctx;
4662   PetscErrorCode   ierr;
4663 
4664   PetscFunctionBegin;
4665   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4666   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4667   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4668   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4669   PetscFunctionReturn(0);
4670 }
4671 
4672 #undef __FUNCT__
4673 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4674 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4675 {
4676   PCBDDCChange_ctx change_ctx;
4677   PetscErrorCode   ierr;
4678 
4679   PetscFunctionBegin;
4680   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4681   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4682   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4683   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4684   PetscFunctionReturn(0);
4685 }
4686 
4687 #undef __FUNCT__
4688 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4689 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4690 {
4691   PC_IS               *pcis=(PC_IS*)pc->data;
4692   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4693   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4694   Mat                 S_j;
4695   PetscInt            *used_xadj,*used_adjncy;
4696   PetscBool           free_used_adj;
4697   PetscErrorCode      ierr;
4698 
4699   PetscFunctionBegin;
4700   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4701   free_used_adj = PETSC_FALSE;
4702   if (pcbddc->sub_schurs_layers == -1) {
4703     used_xadj = NULL;
4704     used_adjncy = NULL;
4705   } else {
4706     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4707       used_xadj = pcbddc->mat_graph->xadj;
4708       used_adjncy = pcbddc->mat_graph->adjncy;
4709     } else if (pcbddc->computed_rowadj) {
4710       used_xadj = pcbddc->mat_graph->xadj;
4711       used_adjncy = pcbddc->mat_graph->adjncy;
4712     } else {
4713       PetscBool      flg_row=PETSC_FALSE;
4714       const PetscInt *xadj,*adjncy;
4715       PetscInt       nvtxs;
4716 
4717       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4718       if (flg_row) {
4719         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4720         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4721         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4722         free_used_adj = PETSC_TRUE;
4723       } else {
4724         pcbddc->sub_schurs_layers = -1;
4725         used_xadj = NULL;
4726         used_adjncy = NULL;
4727       }
4728       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4729     }
4730   }
4731 
4732   /* setup sub_schurs data */
4733   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4734   if (!sub_schurs->use_mumps) {
4735     /* pcbddc->ksp_D up to date only if not using MUMPS */
4736     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4737     ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4738   } else {
4739     PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
4740     ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4741   }
4742   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4743 
4744   /* free adjacency */
4745   if (free_used_adj) {
4746     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4747   }
4748   PetscFunctionReturn(0);
4749 }
4750 
4751 #undef __FUNCT__
4752 #define __FUNCT__ "PCBDDCInitSubSchurs"
4753 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4754 {
4755   PC_IS               *pcis=(PC_IS*)pc->data;
4756   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4757   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4758   PCBDDCGraph         graph;
4759   PetscErrorCode      ierr;
4760 
4761   PetscFunctionBegin;
4762   /* attach interface graph for determining subsets */
4763   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4764     IS verticesIS;
4765 
4766     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4767     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4768     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr);
4769     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr);
4770     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4771     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4772 /*
4773     if (pcbddc->dbg_flag) {
4774       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4775     }
4776 */
4777   } else {
4778     graph = pcbddc->mat_graph;
4779   }
4780 
4781   /* sub_schurs init */
4782   ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr);
4783 
4784   /* free graph struct */
4785   if (pcbddc->sub_schurs_rebuild) {
4786     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4787   }
4788   PetscFunctionReturn(0);
4789 }
4790