xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 3301b35fa709e6765b4611c958433aae76439c43)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscReal       *eigs,thresh;
20   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
21   PetscBool       allocated_S_St;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscErrorCode  ierr;
26 
27   PetscFunctionBegin;
28   if (!sub_schurs->use_mumps) {
29     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
30   }
31 
32   if (pcbddc->dbg_flag) {
33     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
34     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
35     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
36     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
37   }
38 
39   if (pcbddc->dbg_flag) {
40     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
41   }
42 
43   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
44     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
45   }
46 
47   /* max size of subsets */
48   mss = 0;
49   for (i=0;i<sub_schurs->n_subs;i++) {
50     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
51       PetscInt subset_size;
52       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
53       mss = PetscMax(mss,subset_size);
54     }
55   }
56 
57   /* min/max and threshold */
58   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
59   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
60   nmax = PetscMax(nmin,nmax);
61   allocated_S_St = PETSC_FALSE;
62   if (nmin) {
63     allocated_S_St = PETSC_TRUE;
64   }
65 
66   /* allocate lapack workspace */
67   cum = cum2 = 0;
68   maxneigs = 0;
69   for (i=0;i<sub_schurs->n_subs;i++) {
70     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
71       PetscInt n,subset_size;
72 
73       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
74       n = PetscMin(subset_size,nmax);
75       cum += subset_size;
76       cum2 += subset_size*n;
77       maxneigs = PetscMax(maxneigs,n);
78     }
79   }
80   if (mss) {
81     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
82       PetscBLASInt B_itype = 1;
83       PetscBLASInt B_N = mss;
84       PetscReal    zero = 0.0;
85       PetscReal    eps = 0.0; /* dlamch? */
86 
87       B_lwork = -1;
88       S = NULL;
89       St = NULL;
90       eigs = NULL;
91       eigv = NULL;
92       B_iwork = NULL;
93       B_ifail = NULL;
94       thresh = 1.0;
95       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
96 #if defined(PETSC_USE_COMPLEX)
97       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
98 #else
99       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
100 #endif
101       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
102       ierr = PetscFPTrapPop();CHKERRQ(ierr);
103     } else {
104         /* TODO */
105     }
106   } else {
107     lwork = 0;
108   }
109 
110   nv = 0;
111   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
112     ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr);
113   }
114   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
115   if (allocated_S_St) {
116     ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr);
117   }
118   ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
119 #if defined(PETSC_USE_COMPLEX)
120   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
121 #endif
122   ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
123                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
124                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
125                       nv+cum,&pcbddc->adaptive_constraints_idxs,
126                       nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
127   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
128 
129   maxneigs = 0;
130   cum = cum2 = cumarray = 0;
131   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
132   pcbddc->adaptive_constraints_data_ptr[0] = 0;
133   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
134     const PetscInt *idxs;
135 
136     ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
137     for (cum=0;cum<nv;cum++) {
138       pcbddc->adaptive_constraints_n[cum] = 1;
139       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
140       pcbddc->adaptive_constraints_data[cum] = 1.0;
141       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
142       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
143     }
144     cum2 = cum;
145     ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
146   }
147 
148   if (mss) { /* multilevel */
149     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
150     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
151   }
152 
153   for (i=0;i<sub_schurs->n_subs;i++) {
154     PetscInt j,subset_size;
155 
156     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
157     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
158       const PetscInt *idxs;
159       PetscReal      infty = PETSC_MAX_REAL;
160       PetscInt       eigs_start = 0;
161       PetscBLASInt   B_N;
162       PetscBool      same_data = PETSC_FALSE;
163 
164       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
165       if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
166         if (sub_schurs->is_hermitian) {
167           PetscInt j,k;
168           if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */
169             ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
170             ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
171           }
172           for (j=0;j<subset_size;j++) {
173             for (k=j;k<subset_size;k++) {
174               S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
175               St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
176             }
177           }
178         } else {
179           ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
180           ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
181         }
182       } else {
183         S = Sarray + cumarray;
184         St = Starray + cumarray;
185       }
186 
187       ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
188       /* see if we can save some work */
189       if (sub_schurs->n_subs == 1) {
190         ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr);
191       }
192 
193       if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */
194         B_neigs = 0;
195       } else {
196         /* Threshold: this is an heuristic for edges */
197         thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold;
198 
199         if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
200           PetscBLASInt B_itype = 1;
201           PetscBLASInt B_IL, B_IU;
202           PetscReal    eps = -1.0; /* dlamch? */
203           PetscInt     nmin_s;
204 
205           /* ask for eigenvalues larger than thresh */
206           if (pcbddc->dbg_flag) {
207             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
208           }
209           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
210 #if defined(PETSC_USE_COMPLEX)
211           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
212 #else
213           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
214 #endif
215           ierr = PetscFPTrapPop();CHKERRQ(ierr);
216           if (B_ierr) {
217             if (B_ierr < 0 ) {
218               SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
219             } else if (B_ierr <= B_N) {
220               SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
221             } else {
222               SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
223             }
224           }
225 
226           if (B_neigs > nmax) {
227             if (pcbddc->dbg_flag) {
228               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
229             }
230             eigs_start = B_neigs -nmax;
231             B_neigs = nmax;
232           }
233 
234           nmin_s = PetscMin(nmin,B_N);
235           if (B_neigs < nmin_s) {
236             PetscBLASInt B_neigs2;
237 
238             B_IU = B_N - B_neigs;
239             B_IL = B_N - nmin_s + 1;
240             if (pcbddc->dbg_flag) {
241               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
242             }
243             if (sub_schurs->is_hermitian) {
244               PetscInt j;
245               for (j=0;j<subset_size;j++) {
246                 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
247               }
248               for (j=0;j<subset_size;j++) {
249                 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
250               }
251             } else {
252               ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
253               ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
254             }
255             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
256 #if defined(PETSC_USE_COMPLEX)
257             PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
258 #else
259             PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
260 #endif
261             ierr = PetscFPTrapPop();CHKERRQ(ierr);
262             B_neigs += B_neigs2;
263           }
264           if (B_ierr) {
265             if (B_ierr < 0 ) {
266               SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
267             } else if (B_ierr <= B_N) {
268               SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
269             } else {
270               SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
271             }
272           }
273           if (pcbddc->dbg_flag) {
274             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);
275             for (j=0;j<B_neigs;j++) {
276               if (eigs[j] == 0.0) {
277                 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");
278               } else {
279                 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);
280               }
281             }
282           }
283         } else {
284             /* TODO */
285         }
286       }
287       maxneigs = PetscMax(B_neigs,maxneigs);
288       pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
289       if (B_neigs) {
290         ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
291 
292         if (pcbddc->dbg_flag > 1) {
293           PetscInt ii;
294           for (ii=0;ii<B_neigs;ii++) {
295             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
296             for (j=0;j<B_N;j++) {
297               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
298             }
299           }
300         }
301 #if 0
302         for (j=0;j<B_neigs;j++) {
303           PetscBLASInt Blas_N,Blas_one = 1.0;
304           PetscScalar norm;
305           ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
306           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,
307                                                      &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
308           if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
309             norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
310           } else {
311             norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
312           }
313           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
314         }
315 #endif
316         ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
317         pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
318         pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
319         cum++;
320       }
321       ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
322     }
323     /* shift for next computation */
324     cumarray += subset_size*subset_size;
325   }
326   if (pcbddc->dbg_flag) {
327     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
328   }
329 
330   if (mss) {
331     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
332     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
333     /* destroy matrices (junk) */
334     ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
335     ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
336   }
337   if (allocated_S_St) {
338     ierr = PetscFree2(S,St);CHKERRQ(ierr);
339   }
340   ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
341 #if defined(PETSC_USE_COMPLEX)
342   ierr = PetscFree(rwork);CHKERRQ(ierr);
343 #endif
344   if (pcbddc->dbg_flag) {
345     PetscInt maxneigs_r;
346     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
347     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
348   }
349   PetscFunctionReturn(0);
350 }
351 
352 #undef __FUNCT__
353 #define __FUNCT__ "PCBDDCSetUpSolvers"
354 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
355 {
356   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
357   PetscScalar    *coarse_submat_vals;
358   PetscErrorCode ierr;
359 
360   PetscFunctionBegin;
361   /* Setup local scatters R_to_B and (optionally) R_to_D */
362   /* PCBDDCSetUpLocalWorkVectors should be called first! */
363   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
364 
365   /* Setup local neumann solver ksp_R */
366   /* PCBDDCSetUpLocalScatters should be called first! */
367   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
368 
369   /* Change global null space passed in by the user if change of basis has been requested */
370   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
371     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
372   }
373 
374   /*
375      Setup local correction and local part of coarse basis.
376      Gives back the dense local part of the coarse matrix in column major ordering
377   */
378   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
379 
380   /* Compute total number of coarse nodes and setup coarse solver */
381   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
382 
383   /* free */
384   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
385   PetscFunctionReturn(0);
386 }
387 
388 #undef __FUNCT__
389 #define __FUNCT__ "PCBDDCResetCustomization"
390 PetscErrorCode PCBDDCResetCustomization(PC pc)
391 {
392   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
393   PetscErrorCode ierr;
394 
395   PetscFunctionBegin;
396   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
397   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
398   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
399   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
400   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
401   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
402   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
403   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
404   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
405   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
406   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
407   PetscFunctionReturn(0);
408 }
409 
410 #undef __FUNCT__
411 #define __FUNCT__ "PCBDDCResetTopography"
412 PetscErrorCode PCBDDCResetTopography(PC pc)
413 {
414   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
415   PetscErrorCode ierr;
416 
417   PetscFunctionBegin;
418   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
419   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
420   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
421   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
422   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
423   PetscFunctionReturn(0);
424 }
425 
426 #undef __FUNCT__
427 #define __FUNCT__ "PCBDDCResetSolvers"
428 PetscErrorCode PCBDDCResetSolvers(PC pc)
429 {
430   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
431   PetscScalar    *array;
432   PetscErrorCode ierr;
433 
434   PetscFunctionBegin;
435   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
436   if (pcbddc->coarse_phi_B) {
437     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
438     ierr = PetscFree(array);CHKERRQ(ierr);
439   }
440   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
441   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
442   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
443   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
444   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
445   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
446   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
447   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
448   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
449   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
450   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
451   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
452   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
453   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
454   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
455   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
456   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
457   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
458   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
459   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
460   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
461   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
462   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
463   PetscFunctionReturn(0);
464 }
465 
466 #undef __FUNCT__
467 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
468 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
469 {
470   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
471   PC_IS          *pcis = (PC_IS*)pc->data;
472   VecType        impVecType;
473   PetscInt       n_constraints,n_R,old_size;
474   PetscErrorCode ierr;
475 
476   PetscFunctionBegin;
477   if (!pcbddc->ConstraintMatrix) {
478     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
479   }
480   /* get sizes */
481   n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices;
482   n_R = pcis->n-pcbddc->n_vertices;
483   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
484   /* local work vectors (try to avoid unneeded work)*/
485   /* R nodes */
486   old_size = -1;
487   if (pcbddc->vec1_R) {
488     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
489   }
490   if (n_R != old_size) {
491     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
492     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
493     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
494     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
495     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
496     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
497   }
498   /* local primal dofs */
499   old_size = -1;
500   if (pcbddc->vec1_P) {
501     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
502   }
503   if (pcbddc->local_primal_size != old_size) {
504     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
505     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
506     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
507     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
508   }
509   /* local explicit constraints */
510   old_size = -1;
511   if (pcbddc->vec1_C) {
512     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
513   }
514   if (n_constraints && n_constraints != old_size) {
515     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
516     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
517     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
518     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
519   }
520   PetscFunctionReturn(0);
521 }
522 
523 #undef __FUNCT__
524 #define __FUNCT__ "PCBDDCSetUpCorrection"
525 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
526 {
527   PetscErrorCode  ierr;
528   /* pointers to pcis and pcbddc */
529   PC_IS*          pcis = (PC_IS*)pc->data;
530   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
531   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
532   /* submatrices of local problem */
533   Mat             A_RV,A_VR,A_VV,local_auxmat2_R;
534   /* submatrices of local coarse problem */
535   Mat             S_VV,S_CV,S_VC,S_CC;
536   /* working matrices */
537   Mat             C_CR;
538   /* additional working stuff */
539   PC              pc_R;
540   Mat             F;
541   PetscBool       isLU,isCHOL,isILU;
542 
543   PetscScalar     *coarse_submat_vals; /* TODO: use a PETSc matrix */
544   PetscScalar     *work;
545   PetscInt        *idx_V_B;
546   PetscInt        n,n_vertices,n_constraints;
547   PetscInt        i,n_R,n_D,n_B;
548   PetscBool       unsymmetric_check;
549   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
550   MatType         impMatType;
551   /* some shortcuts to scalars */
552   PetscScalar     one=1.0,m_one=-1.0;
553 
554   PetscFunctionBegin;
555   n_vertices = pcbddc->n_vertices;
556   n_constraints = pcbddc->local_primal_size-n_vertices;
557   /* Set Non-overlapping dimensions */
558   n_B = pcis->n_B;
559   n_D = pcis->n - n_B;
560   n_R = pcis->n - n_vertices;
561 
562   /* Set types for local objects needed by BDDC precondtioner */
563   impMatType = MATSEQDENSE;
564 
565   /* vertices in boundary numbering */
566   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
567   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr);
568   if (i != n_vertices) {
569     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
570   }
571 
572   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
573   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
574   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
575   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
576   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
577   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
578   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
579   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
580   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
581   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
582 
583   unsymmetric_check = PETSC_FALSE;
584   /* allocate workspace */
585   n = 0;
586   if (n_constraints) {
587     n += n_R*n_constraints;
588   }
589   if (n_vertices) {
590     n = PetscMax(2*n_R*n_vertices,n);
591     n = PetscMax((n_R+n_B)*n_vertices,n);
592   }
593   if (!pcbddc->symmetric_primal) {
594     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
595     unsymmetric_check = PETSC_TRUE;
596   }
597   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
598 
599   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
600   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
601   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
602   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
603   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
604   if (isLU || isILU || isCHOL) {
605     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
606   } else if (sub_schurs->reuse_mumps) {
607     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
608     MatFactorType type;
609 
610     F = reuse_mumps->F;
611     ierr = MatGetFactorType(F,&type);CHKERRQ(ierr);
612     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
613   } else {
614     F = NULL;
615   }
616 
617   /* Precompute stuffs needed for preprocessing and application of BDDC*/
618   if (n_constraints) {
619     Mat         M1,M2,M3;
620     Mat         auxmat;
621     IS          is_aux;
622     PetscScalar *array,*array2;
623 
624     ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
625     ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
626 
627     /* Extract constraints on R nodes: C_{CR}  */
628     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
629     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
630     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr);
631 
632     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
633     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
634     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
635     for (i=0;i<n_constraints;i++) {
636       const PetscScalar *row_cmat_values;
637       const PetscInt    *row_cmat_indices;
638       PetscInt          size_of_constraint,j;
639 
640       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
641       for (j=0;j<size_of_constraint;j++) {
642         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
643       }
644       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
645     }
646     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr);
647     if (F) {
648       Mat B;
649 
650       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
651       ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr);
652       ierr = MatDestroy(&B);CHKERRQ(ierr);
653     } else {
654       PetscScalar *marr;
655 
656       ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
657       for (i=0;i<n_constraints;i++) {
658         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
659         ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr);
660         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
661         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
662         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
663       }
664       ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
665     }
666     if (!pcbddc->switch_static) {
667       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr);
668       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
669       ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
670       for (i=0;i<n_constraints;i++) {
671         ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr);
672         ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr);
673         ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
674         ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
675         ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
676         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
677       }
678       ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
679       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
680       ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
681     } else {
682       ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr);
683       pcbddc->local_auxmat2 = local_auxmat2_R;
684       ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
685     }
686     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
687     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
688     ierr = MatScale(M3,m_one);CHKERRQ(ierr);
689     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
690     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
691     if (isCHOL) {
692       ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr);
693     } else {
694       ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
695     }
696     ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr);
697     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
698     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
699     ierr = MatDestroy(&M2);CHKERRQ(ierr);
700     ierr = MatDestroy(&M3);CHKERRQ(ierr);
701     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
702     ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
703     ierr = MatDestroy(&auxmat);CHKERRQ(ierr);
704     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
705     ierr = MatDestroy(&M1);CHKERRQ(ierr);
706   }
707   /* Get submatrices from subdomain matrix */
708   if (n_vertices) {
709     Mat       newmat;
710     IS        is_aux;
711     PetscInt  ibs,mbs;
712     PetscBool issbaij;
713 
714     if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */
715       IS tis;
716 
717       ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr);
718       ierr = ISSort(tis);CHKERRQ(ierr);
719       ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr);
720       ierr = ISDestroy(&tis);CHKERRQ(ierr);
721     } else {
722       ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
723     }
724     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
725     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
726     if (ibs != mbs) { /* need to convert to SEQAIJ */
727       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
728       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
729       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
730       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
731       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
732       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
733     } else {
734       /* this is safe */
735       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
736       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
737       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
738         ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
739         ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
740         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
741         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
742         ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
743         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
744       } else {
745         ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
746         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
747         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
748       }
749       /* need to convert to SeqAIJ for MatMatMult with dense matrix */
750       ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
751     }
752     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
753   }
754 
755   /* Matrix of coarse basis functions (local) */
756   if (pcbddc->coarse_phi_B) {
757     PetscInt on_B,on_primal,on_D=n_D;
758     if (pcbddc->coarse_phi_D) {
759       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
760     }
761     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
762     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
763       PetscScalar *marray;
764 
765       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
766       ierr = PetscFree(marray);CHKERRQ(ierr);
767       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
768       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
769       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
770       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
771     }
772   }
773 
774   if (!pcbddc->coarse_phi_B) {
775     PetscScalar *marray;
776 
777     n = n_B*pcbddc->local_primal_size;
778     if (pcbddc->switch_static || pcbddc->dbg_flag) {
779       n += n_D*pcbddc->local_primal_size;
780     }
781     if (!pcbddc->symmetric_primal) {
782       n *= 2;
783     }
784     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
785     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
786     n = n_B*pcbddc->local_primal_size;
787     if (pcbddc->switch_static || pcbddc->dbg_flag) {
788       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
789       n += n_D*pcbddc->local_primal_size;
790     }
791     if (!pcbddc->symmetric_primal) {
792       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
793       if (pcbddc->switch_static || pcbddc->dbg_flag) {
794         n = n_B*pcbddc->local_primal_size;
795         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
796       }
797     } else {
798       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
799       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
800       if (pcbddc->switch_static || pcbddc->dbg_flag) {
801         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
802         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
803       }
804     }
805   }
806   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
807   /* vertices */
808   if (n_vertices) {
809 
810     if (n_R) {
811       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
812       PetscBLASInt B_N,B_one = 1;
813       PetscScalar  *x,*y;
814 
815       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
816       ierr = MatScale(A_RV,m_one);CHKERRQ(ierr);
817       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
818       if (F) { /* TODO could be optimized for symmetric problems */
819         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
820       } else {
821         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
822         for (i=0;i<n_vertices;i++) {
823           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
824           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
825           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
826           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
827           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
828         }
829         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
830       }
831       ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
832       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
833       if (n_constraints) {
834         Mat B;
835 
836         ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
837         for (i=0;i<n_vertices;i++) {
838           ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
839           ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr);
840           ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
841           ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
842           ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
843           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
844         }
845         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
846         ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
847         ierr = MatDestroy(&B);CHKERRQ(ierr);
848         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
849         ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
850         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
851         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
852         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
853         ierr = MatDestroy(&B);CHKERRQ(ierr);
854       }
855       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
856       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
857       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
858       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
859       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
860       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
861       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
862       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
863       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
864       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
865     } else {
866       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
867     }
868     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
869     /* coarse basis functions */
870     for (i=0;i<n_vertices;i++) {
871       PetscScalar *y;
872 
873       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
874       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
875       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
876       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
877       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
878       y[n_B*i+idx_V_B[i]] = 1.0;
879       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
880       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
881 
882       if (pcbddc->switch_static || pcbddc->dbg_flag) {
883         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
884         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
885         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
887         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
888         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
889       }
890       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
891     }
892   }
893 
894   if (n_constraints) {
895     Mat B;
896 
897     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
898     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
899     ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
900     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
901     if (n_vertices) {
902       if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
903         ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr);
904       } else {
905         Mat S_VCt;
906 
907         ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr);
908         ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
909         ierr = MatDestroy(&S_VCt);CHKERRQ(ierr);
910       }
911     }
912     ierr = MatDestroy(&B);CHKERRQ(ierr);
913     /* coarse basis functions */
914     for (i=0;i<n_constraints;i++) {
915       PetscScalar *y;
916 
917       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
918       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
919       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
920       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
921       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
922       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
923       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
924       if (pcbddc->switch_static || pcbddc->dbg_flag) {
925         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
926         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
927         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
928         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
929         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
930         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
931       }
932       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
933     }
934   }
935   if (n_constraints) {
936     ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr);
937   }
938 
939   /* compute other basis functions for non-symmetric problems */
940   if (!pcbddc->symmetric_primal) {
941 
942     if (n_constraints) {
943       Mat S_CCT,B_C;
944 
945       /* this is a lazy thing */
946       ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
947       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
948       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
949       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
950       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
951       if (n_vertices) {
952         Mat B_V,S_VCT;
953 
954         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
955         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
956         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
957         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
958         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
959       }
960       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
961     } else { /* if there are no constraints, reset work */
962       ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
963     }
964     if (n_vertices && n_R) {
965       Mat          A_VRT;
966       PetscScalar  *marray;
967       PetscBLASInt B_N,B_one = 1;
968 
969       ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr);
970       ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
971       ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr);
972       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
973       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one));
974       ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr);
975       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
976     }
977 
978     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
979       for (i=0;i<pcbddc->local_primal_size;i++) {
980         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
981         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
982         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
983         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
984         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
985       }
986     } else {
987       for (i=0;i<pcbddc->local_primal_size;i++) {
988         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
989         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
990         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
991         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
992         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
993       }
994     }
995     /* coarse basis functions */
996     for (i=0;i<pcbddc->local_primal_size;i++) {
997       PetscScalar *y;
998 
999       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
1000       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
1001       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
1002       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1003       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1004       if (i<n_vertices) {
1005         y[n_B*i+idx_V_B[i]] = 1.0;
1006       }
1007       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
1008       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1009 
1010       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1011         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1012         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1013         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1014         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1015         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1016         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1017       }
1018       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1019     }
1020   }
1021   /* free memory */
1022   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
1023   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
1024   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
1025   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
1026   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
1027   ierr = PetscFree(work);CHKERRQ(ierr);
1028   if (n_vertices) {
1029     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1030   }
1031   if (n_constraints) {
1032     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1033   }
1034   /* Checking coarse_sub_mat and coarse basis functios */
1035   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1036   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1037   if (pcbddc->dbg_flag) {
1038     Mat         coarse_sub_mat;
1039     Mat         AUXMAT,TM1,TM2,TM3,TM4;
1040     Mat         coarse_phi_D,coarse_phi_B;
1041     Mat         coarse_psi_D,coarse_psi_B;
1042     Mat         A_II,A_BB,A_IB,A_BI;
1043     Mat         C_B,CPHI;
1044     IS          is_dummy;
1045     Vec         mones;
1046     MatType     checkmattype=MATSEQAIJ;
1047     PetscReal   real_value;
1048 
1049     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
1050     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
1051     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
1052     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
1053     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
1054     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
1055     if (unsymmetric_check) {
1056       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
1057       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
1058     }
1059     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
1060 
1061     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1062     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr);
1063     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1064     if (unsymmetric_check) {
1065       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1066       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1067       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1068       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1069       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1070       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1071       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1072       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1073       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1074       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1075       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1076       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1077     } else {
1078       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1079       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1080       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1081       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1082       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1083       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1084       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1085       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1086     }
1087     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1088     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1089     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1090     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1091     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1092     ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1093     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1094     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d      matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1095 
1096     /* check constraints */
1097     ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr);
1098     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1099     ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1100     ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr);
1101     ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1102     ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1103     ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1104     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1105     ierr = MatDestroy(&C_B);CHKERRQ(ierr);
1106     ierr = MatDestroy(&CPHI);CHKERRQ(ierr);
1107     ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1108     ierr = VecDestroy(&mones);CHKERRQ(ierr);
1109 
1110     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1111     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1112     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1113     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1114     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1115     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1116     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1117     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1118     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1119     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1120     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1121     if (unsymmetric_check) {
1122       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1123       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1124     }
1125     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1126   }
1127   /* get back data */
1128   *coarse_submat_vals_n = coarse_submat_vals;
1129   PetscFunctionReturn(0);
1130 }
1131 
1132 #undef __FUNCT__
1133 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1134 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1135 {
1136   Mat            *work_mat;
1137   IS             isrow_s,iscol_s;
1138   PetscBool      rsorted,csorted;
1139   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1140   PetscErrorCode ierr;
1141 
1142   PetscFunctionBegin;
1143   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1144   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1145   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1146   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1147 
1148   if (!rsorted) {
1149     const PetscInt *idxs;
1150     PetscInt *idxs_sorted,i;
1151 
1152     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1153     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1154     for (i=0;i<rsize;i++) {
1155       idxs_perm_r[i] = i;
1156     }
1157     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1158     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1159     for (i=0;i<rsize;i++) {
1160       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1161     }
1162     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1163     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1164   } else {
1165     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1166     isrow_s = isrow;
1167   }
1168 
1169   if (!csorted) {
1170     if (isrow == iscol) {
1171       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1172       iscol_s = isrow_s;
1173     } else {
1174       const PetscInt *idxs;
1175       PetscInt *idxs_sorted,i;
1176 
1177       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1178       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1179       for (i=0;i<csize;i++) {
1180         idxs_perm_c[i] = i;
1181       }
1182       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1183       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1184       for (i=0;i<csize;i++) {
1185         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1186       }
1187       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1188       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1189     }
1190   } else {
1191     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1192     iscol_s = iscol;
1193   }
1194 
1195   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1196 
1197   if (!rsorted || !csorted) {
1198     Mat      new_mat;
1199     IS       is_perm_r,is_perm_c;
1200 
1201     if (!rsorted) {
1202       PetscInt *idxs_r,i;
1203       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1204       for (i=0;i<rsize;i++) {
1205         idxs_r[idxs_perm_r[i]] = i;
1206       }
1207       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1208       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1209     } else {
1210       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1211     }
1212     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1213 
1214     if (!csorted) {
1215       if (isrow_s == iscol_s) {
1216         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1217         is_perm_c = is_perm_r;
1218       } else {
1219         PetscInt *idxs_c,i;
1220         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1221         for (i=0;i<csize;i++) {
1222           idxs_c[idxs_perm_c[i]] = i;
1223         }
1224         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1225         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1226       }
1227     } else {
1228       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1229     }
1230     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1231 
1232     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1233     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1234     work_mat[0] = new_mat;
1235     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1236     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1237   }
1238 
1239   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1240   *B = work_mat[0];
1241   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1242   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1243   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1244   PetscFunctionReturn(0);
1245 }
1246 
1247 #undef __FUNCT__
1248 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1249 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1250 {
1251   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1252   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1253   Mat            new_mat;
1254   IS             is_local,is_global;
1255   PetscInt       local_size;
1256   PetscBool      isseqaij;
1257   PetscErrorCode ierr;
1258 
1259   PetscFunctionBegin;
1260   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1261   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1262   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1263   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1264   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1265   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1266   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1267 
1268   /* check */
1269   if (pcbddc->dbg_flag) {
1270     Vec       x,x_change;
1271     PetscReal error;
1272 
1273     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1274     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1275     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1276     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1277     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1278     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1279     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1280     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1281     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1282     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1283     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1284     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1285     ierr = VecDestroy(&x);CHKERRQ(ierr);
1286     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1287   }
1288 
1289   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1290   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1291   if (isseqaij) {
1292     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1293   } else {
1294     Mat work_mat;
1295     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1296     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1297     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1298   }
1299   if (matis->A->symmetric_set) {
1300     ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr);
1301 #if !defined(PETSC_USE_COMPLEX)
1302     ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr);
1303 #endif
1304   }
1305   /*
1306   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1307   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1308   */
1309   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1310   PetscFunctionReturn(0);
1311 }
1312 
1313 #undef __FUNCT__
1314 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1315 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1316 {
1317   PC_IS*          pcis = (PC_IS*)(pc->data);
1318   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
1319   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1320   PetscInt        *idx_R_local=NULL;
1321   PetscInt        n_vertices,i,j,n_R,n_D,n_B;
1322   PetscInt        vbs,bs;
1323   PetscBT         bitmask=NULL;
1324   PetscErrorCode  ierr;
1325 
1326   PetscFunctionBegin;
1327   /*
1328     No need to setup local scatters if
1329       - primal space is unchanged
1330         AND
1331       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1332         AND
1333       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1334   */
1335   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1336     PetscFunctionReturn(0);
1337   }
1338   /* destroy old objects */
1339   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1340   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1341   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1342   /* Set Non-overlapping dimensions */
1343   n_B = pcis->n_B;
1344   n_D = pcis->n - n_B;
1345   n_vertices = pcbddc->n_vertices;
1346 
1347   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1348 
1349   /* create auxiliary bitmask and allocate workspace */
1350   if (!sub_schurs->reuse_mumps) {
1351     ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1352     ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1353     for (i=0;i<n_vertices;i++) {
1354       ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr);
1355     }
1356 
1357     for (i=0, n_R=0; i<pcis->n; i++) {
1358       if (!PetscBTLookup(bitmask,i)) {
1359         idx_R_local[n_R++] = i;
1360       }
1361     }
1362   } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */
1363     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1364 
1365     ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1366     ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr);
1367   }
1368 
1369   /* Block code */
1370   vbs = 1;
1371   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1372   if (bs>1 && !(n_vertices%bs)) {
1373     PetscBool is_blocked = PETSC_TRUE;
1374     PetscInt  *vary;
1375     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1376     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1377     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1378     for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
1379     for (i=0; i<n_vertices/bs; i++) {
1380       if (vary[i]!=0 && vary[i]!=bs) {
1381         is_blocked = PETSC_FALSE;
1382         break;
1383       }
1384     }
1385     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1386       vbs = bs;
1387       for (i=0;i<n_R/vbs;i++) {
1388         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1389       }
1390     }
1391     ierr = PetscFree(vary);CHKERRQ(ierr);
1392   }
1393   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1394   if (sub_schurs->reuse_mumps) {
1395     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1396 
1397     ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1398     ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr);
1399     ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr);
1400     reuse_mumps->is_R = pcbddc->is_R_local;
1401   } else {
1402     ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1403   }
1404 
1405   /* print some info if requested */
1406   if (pcbddc->dbg_flag) {
1407     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1408     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1409     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1410     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1411     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1412     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1413     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1414   }
1415 
1416   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1417   if (!sub_schurs->reuse_mumps) {
1418     IS       is_aux1,is_aux2;
1419     PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1420 
1421     ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1422     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1423     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1424     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1425     for (i=0; i<n_D; i++) {
1426       ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1427     }
1428     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1429     for (i=0, j=0; i<n_R; i++) {
1430       if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1431         aux_array1[j++] = i;
1432       }
1433     }
1434     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1435     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1436     for (i=0, j=0; i<n_B; i++) {
1437       if (!PetscBTLookup(bitmask,is_indices[i])) {
1438         aux_array2[j++] = i;
1439       }
1440     }
1441     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1442     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1443     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1444     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1445     ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1446 
1447     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1448       ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1449       for (i=0, j=0; i<n_R; i++) {
1450         if (PetscBTLookup(bitmask,idx_R_local[i])) {
1451           aux_array1[j++] = i;
1452         }
1453       }
1454       ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1455       ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1456       ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1457     }
1458     ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1459     ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1460   } else {
1461     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1462     IS               tis;
1463     PetscInt         schur_size;
1464 
1465     ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr);
1466     ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr);
1467     ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr);
1468     ierr = ISDestroy(&tis);CHKERRQ(ierr);
1469     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1470       ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr);
1471       ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1472       ierr = ISDestroy(&tis);CHKERRQ(ierr);
1473     }
1474   }
1475   PetscFunctionReturn(0);
1476 }
1477 
1478 
1479 #undef __FUNCT__
1480 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1481 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1482 {
1483   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1484   PC_IS          *pcis = (PC_IS*)pc->data;
1485   PC             pc_temp;
1486   Mat            A_RR;
1487   MatReuse       reuse;
1488   PetscScalar    m_one = -1.0;
1489   PetscReal      value;
1490   PetscInt       n_D,n_R,ibs,mbs;
1491   PetscBool      use_exact,use_exact_reduced,issbaij;
1492   PetscErrorCode ierr;
1493   /* prefixes stuff */
1494   char           dir_prefix[256],neu_prefix[256],str_level[16];
1495   size_t         len;
1496 
1497   PetscFunctionBegin;
1498 
1499   /* compute prefixes */
1500   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1501   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1502   if (!pcbddc->current_level) {
1503     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1504     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1505     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1506     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1507   } else {
1508     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1509     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1510     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1511     len -= 15; /* remove "pc_bddc_coarse_" */
1512     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1513     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1514     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1515     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1516     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1517     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1518     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1519     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1520   }
1521 
1522   /* DIRICHLET PROBLEM */
1523   if (dirichlet) {
1524     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1525     if (pcbddc->local_mat->symmetric_set) {
1526       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
1527     }
1528     /* Matrix for Dirichlet problem is pcis->A_II */
1529     n_D = pcis->n - pcis->n_B;
1530     if (!pcbddc->ksp_D) { /* create object if not yet build */
1531       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1532       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1533       /* default */
1534       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1535       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1536       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1537       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1538       if (issbaij) {
1539         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1540       } else {
1541         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1542       }
1543       /* Allow user's customization */
1544       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1545       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1546     }
1547     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1548     if (sub_schurs->reuse_mumps) {
1549       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1550 
1551       ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr);
1552     }
1553     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1554     if (!n_D) {
1555       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1556       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1557     }
1558     /* Set Up KSP for Dirichlet problem of BDDC */
1559     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1560     /* set ksp_D into pcis data */
1561     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1562     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1563     pcis->ksp_D = pcbddc->ksp_D;
1564   }
1565 
1566   /* NEUMANN PROBLEM */
1567   A_RR = 0;
1568   if (neumann) {
1569     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1570     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1571     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1572     if (pcbddc->ksp_R) { /* already created ksp */
1573       PetscInt nn_R;
1574       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1575       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1576       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1577       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1578         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1579         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1580         reuse = MAT_INITIAL_MATRIX;
1581       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1582         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1583           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1584           reuse = MAT_INITIAL_MATRIX;
1585         } else { /* safe to reuse the matrix */
1586           reuse = MAT_REUSE_MATRIX;
1587         }
1588       }
1589       /* last check */
1590       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1591         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1592         reuse = MAT_INITIAL_MATRIX;
1593       }
1594     } else { /* first time, so we need to create the matrix */
1595       reuse = MAT_INITIAL_MATRIX;
1596     }
1597     /* extract A_RR */
1598     if (!sub_schurs->reuse_mumps) {
1599       ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1600       ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1601       if (ibs != mbs) {
1602         Mat newmat;
1603         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1604         ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1605         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1606       } else {
1607         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1608       }
1609       if (pcbddc->local_mat->symmetric_set) {
1610         ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
1611       }
1612     } else {
1613       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1614 
1615       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1616       ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr);
1617       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1618     }
1619     if (!pcbddc->ksp_R) { /* create object if not present */
1620       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1621       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1622       /* default */
1623       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1624       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1625       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1626       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1627       if (issbaij) {
1628         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1629       } else {
1630         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1631       }
1632       /* Allow user's customization */
1633       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1634       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1635     }
1636     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1637     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1638     if (!n_R) {
1639       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1640       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1641     }
1642     /* Reuse MUMPS solver if it is present */
1643     if (sub_schurs->reuse_mumps) {
1644       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1645 
1646       ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr);
1647     }
1648     /* Set Up KSP for Neumann problem of BDDC */
1649     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1650   }
1651   /* free Neumann problem's matrix */
1652   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1653 
1654   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1655   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1656     if (pcbddc->dbg_flag) {
1657       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1658       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1659       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1660     }
1661     if (dirichlet) { /* Dirichlet */
1662       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1663       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1664       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1665       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1666       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1667       /* need to be adapted? */
1668       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1669       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1670       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1671       /* print info */
1672       if (pcbddc->dbg_flag) {
1673         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1674         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1675       }
1676       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1677         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1678       }
1679     }
1680     if (neumann) { /* Neumann */
1681       ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr);
1682       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1683       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1684       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1685       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1686       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1687       /* need to be adapted? */
1688       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1689       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1690       /* print info */
1691       if (pcbddc->dbg_flag) {
1692         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1693         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1694       }
1695       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1696         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1697       }
1698     }
1699   }
1700   PetscFunctionReturn(0);
1701 }
1702 
1703 #undef __FUNCT__
1704 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1705 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
1706 {
1707   PetscErrorCode  ierr;
1708   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1709   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1710 
1711   PetscFunctionBegin;
1712   if (!sub_schurs->reuse_mumps) {
1713     ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr);
1714   }
1715   if (!pcbddc->switch_static) {
1716     if (applytranspose && pcbddc->local_auxmat1) {
1717       ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1718       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1719     }
1720     if (!sub_schurs->reuse_mumps) {
1721       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1722       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1723     } else {
1724       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1725 
1726       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1727       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1728     }
1729   } else {
1730     ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1731     ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1732     ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1733     ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1734     if (applytranspose && pcbddc->local_auxmat1) {
1735       ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr);
1736       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1737       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1738       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1739     }
1740   }
1741   if (!sub_schurs->reuse_mumps) {
1742     if (applytranspose) {
1743       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1744     } else {
1745       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1746     }
1747 #if defined(PETSC_HAVE_MUMPS)
1748   } else {
1749     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1750 
1751     if (applytranspose) {
1752       ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
1753     } else {
1754       ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
1755     }
1756 #endif
1757   }
1758   ierr = VecSet(inout_B,0.);CHKERRQ(ierr);
1759   if (!pcbddc->switch_static) {
1760     if (!sub_schurs->reuse_mumps) {
1761       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1762       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1763     } else {
1764       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1765 
1766       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1767       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1768     }
1769     if (!applytranspose && pcbddc->local_auxmat1) {
1770       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1771       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1772     }
1773   } else {
1774     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1775     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1776     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1777     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1778     if (!applytranspose && pcbddc->local_auxmat1) {
1779       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1780       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1781     }
1782     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1783     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1784     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1785     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1786   }
1787   PetscFunctionReturn(0);
1788 }
1789 
1790 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1791 #undef __FUNCT__
1792 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1793 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1794 {
1795   PetscErrorCode ierr;
1796   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1797   PC_IS*            pcis = (PC_IS*)  (pc->data);
1798   const PetscScalar zero = 0.0;
1799 
1800   PetscFunctionBegin;
1801   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1802   if (applytranspose) {
1803     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1804     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1805   } else {
1806     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1807     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1808   }
1809   /* start communications from local primal nodes to rhs of coarse solver */
1810   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1811   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1812   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1813 
1814   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1815   /* TODO remove null space when doing multilevel */
1816   if (pcbddc->coarse_ksp) {
1817     Vec rhs,sol;
1818 
1819     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
1820     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
1821     if (applytranspose) {
1822       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1823     } else {
1824       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1825     }
1826   }
1827 
1828   /* Local solution on R nodes */
1829   if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */
1830     ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr);
1831   }
1832 
1833   /* communications from coarse sol to local primal nodes */
1834   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1835   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1836 
1837   /* Sum contributions from two levels */
1838   if (applytranspose) {
1839     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1840     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1841   } else {
1842     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1843     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1844   }
1845   PetscFunctionReturn(0);
1846 }
1847 
1848 #undef __FUNCT__
1849 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1850 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1851 {
1852   PetscErrorCode ierr;
1853   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1854   PetscScalar    *array;
1855   Vec            from,to;
1856 
1857   PetscFunctionBegin;
1858   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1859     from = pcbddc->coarse_vec;
1860     to = pcbddc->vec1_P;
1861     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1862       Vec tvec;
1863 
1864       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1865       ierr = VecResetArray(tvec);CHKERRQ(ierr);
1866       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1867       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
1868       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
1869       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
1870     }
1871   } else { /* from local to global -> put data in coarse right hand side */
1872     from = pcbddc->vec1_P;
1873     to = pcbddc->coarse_vec;
1874   }
1875   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1876   PetscFunctionReturn(0);
1877 }
1878 
1879 #undef __FUNCT__
1880 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1881 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1882 {
1883   PetscErrorCode ierr;
1884   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1885   PetscScalar    *array;
1886   Vec            from,to;
1887 
1888   PetscFunctionBegin;
1889   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1890     from = pcbddc->coarse_vec;
1891     to = pcbddc->vec1_P;
1892   } else { /* from local to global -> put data in coarse right hand side */
1893     from = pcbddc->vec1_P;
1894     to = pcbddc->coarse_vec;
1895   }
1896   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1897   if (smode == SCATTER_FORWARD) {
1898     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1899       Vec tvec;
1900 
1901       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1902       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
1903       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
1904       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
1905     }
1906   } else {
1907     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
1908      ierr = VecResetArray(from);CHKERRQ(ierr);
1909     }
1910   }
1911   PetscFunctionReturn(0);
1912 }
1913 
1914 /* uncomment for testing purposes */
1915 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1916 #undef __FUNCT__
1917 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1918 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1919 {
1920   PetscErrorCode    ierr;
1921   PC_IS*            pcis = (PC_IS*)(pc->data);
1922   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1923   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1924   /* one and zero */
1925   PetscScalar       one=1.0,zero=0.0;
1926   /* space to store constraints and their local indices */
1927   PetscScalar       *constraints_data;
1928   PetscInt          *constraints_idxs,*constraints_idxs_B;
1929   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
1930   PetscInt          *constraints_n;
1931   /* iterators */
1932   PetscInt          i,j,k,total_counts,total_counts_cc,cum;
1933   /* BLAS integers */
1934   PetscBLASInt      lwork,lierr;
1935   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1936   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1937   /* reuse */
1938   PetscInt          olocal_primal_size,olocal_primal_size_cc;
1939   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
1940   /* change of basis */
1941   PetscBool         qr_needed;
1942   PetscBT           change_basis,qr_needed_idx;
1943   /* auxiliary stuff */
1944   PetscInt          *nnz,*is_indices;
1945   PetscInt          ncc;
1946   /* some quantities */
1947   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1948   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1949 
1950   PetscFunctionBegin;
1951   /* Destroy Mat objects computed previously */
1952   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1953   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1954   /* save info on constraints from previous setup (if any) */
1955   olocal_primal_size = pcbddc->local_primal_size;
1956   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
1957   ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr);
1958   ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
1959   ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
1960   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
1961   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
1962 
1963   /* print some info */
1964   if (pcbddc->dbg_flag) {
1965     IS       vertices;
1966     PetscInt nv,nedges,nfaces;
1967     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
1968     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
1969     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
1970     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1971     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1972     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
1973     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
1974     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
1975     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1976   }
1977 
1978   if (!pcbddc->adaptive_selection) {
1979     IS           ISForVertices,*ISForFaces,*ISForEdges;
1980     MatNullSpace nearnullsp;
1981     const Vec    *nearnullvecs;
1982     Vec          *localnearnullsp;
1983     PetscScalar  *array;
1984     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
1985     PetscBool    nnsp_has_cnst;
1986     /* LAPACK working arrays for SVD or POD */
1987     PetscBool    skip_lapack,boolforchange;
1988     PetscScalar  *work;
1989     PetscReal    *singular_vals;
1990 #if defined(PETSC_USE_COMPLEX)
1991     PetscReal    *rwork;
1992 #endif
1993 #if defined(PETSC_MISSING_LAPACK_GESVD)
1994     PetscScalar  *temp_basis,*correlation_mat;
1995 #else
1996     PetscBLASInt dummy_int=1;
1997     PetscScalar  dummy_scalar=1.;
1998 #endif
1999 
2000     /* Get index sets for faces, edges and vertices from graph */
2001     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
2002     /* free unneeded index sets */
2003     if (!pcbddc->use_vertices) {
2004       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2005     }
2006     if (!pcbddc->use_edges) {
2007       for (i=0;i<n_ISForEdges;i++) {
2008         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2009       }
2010       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2011       n_ISForEdges = 0;
2012     }
2013     if (!pcbddc->use_faces) {
2014       for (i=0;i<n_ISForFaces;i++) {
2015         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2016       }
2017       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2018       n_ISForFaces = 0;
2019     }
2020 
2021 #if defined(PETSC_USE_DEBUG)
2022     /* HACK: when solving singular problems not using vertices, a change of basis is mandatory.
2023        Also use_change_of_basis should be consistent among processors */
2024     if (pcbddc->NullSpace) {
2025       PetscBool tbool[2],gbool[2];
2026 
2027       if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) {
2028         pcbddc->use_change_of_basis = PETSC_TRUE;
2029         if (!ISForEdges) {
2030           pcbddc->use_change_on_faces = PETSC_TRUE;
2031         }
2032       }
2033       tbool[0] = pcbddc->use_change_of_basis;
2034       tbool[1] = pcbddc->use_change_on_faces;
2035       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2036       pcbddc->use_change_of_basis = gbool[0];
2037       pcbddc->use_change_on_faces = gbool[1];
2038     }
2039 #endif
2040 
2041     /* check if near null space is attached to global mat */
2042     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
2043     if (nearnullsp) {
2044       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
2045       /* remove any stored info */
2046       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
2047       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2048       /* store information for BDDC solver reuse */
2049       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
2050       pcbddc->onearnullspace = nearnullsp;
2051       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2052       for (i=0;i<nnsp_size;i++) {
2053         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
2054       }
2055     } else { /* if near null space is not provided BDDC uses constants by default */
2056       nnsp_size = 0;
2057       nnsp_has_cnst = PETSC_TRUE;
2058     }
2059     /* get max number of constraints on a single cc */
2060     max_constraints = nnsp_size;
2061     if (nnsp_has_cnst) max_constraints++;
2062 
2063     /*
2064          Evaluate maximum storage size needed by the procedure
2065          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
2066          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
2067          There can be multiple constraints per connected component
2068                                                                                                                                                            */
2069     n_vertices = 0;
2070     if (ISForVertices) {
2071       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
2072     }
2073     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
2074     ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr);
2075 
2076     total_counts = n_ISForFaces+n_ISForEdges;
2077     total_counts *= max_constraints;
2078     total_counts += n_vertices;
2079     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2080 
2081     total_counts = 0;
2082     max_size_of_constraint = 0;
2083     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
2084       IS used_is;
2085       if (i<n_ISForEdges) {
2086         used_is = ISForEdges[i];
2087       } else {
2088         used_is = ISForFaces[i-n_ISForEdges];
2089       }
2090       ierr = ISGetSize(used_is,&j);CHKERRQ(ierr);
2091       total_counts += j;
2092       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
2093     }
2094     ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr);
2095 
2096     /* get local part of global near null space vectors */
2097     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
2098     for (k=0;k<nnsp_size;k++) {
2099       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
2100       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2101       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2102     }
2103 
2104     /* whether or not to skip lapack calls */
2105     skip_lapack = PETSC_TRUE;
2106     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
2107 
2108     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
2109     if (!skip_lapack) {
2110       PetscScalar temp_work;
2111 
2112 #if defined(PETSC_MISSING_LAPACK_GESVD)
2113       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
2114       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
2115       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
2116       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
2117 #if defined(PETSC_USE_COMPLEX)
2118       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
2119 #endif
2120       /* now we evaluate the optimal workspace using query with lwork=-1 */
2121       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2122       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
2123       lwork = -1;
2124       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2125 #if !defined(PETSC_USE_COMPLEX)
2126       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
2127 #else
2128       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
2129 #endif
2130       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2131       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
2132 #else /* on missing GESVD */
2133       /* SVD */
2134       PetscInt max_n,min_n;
2135       max_n = max_size_of_constraint;
2136       min_n = max_constraints;
2137       if (max_size_of_constraint < max_constraints) {
2138         min_n = max_size_of_constraint;
2139         max_n = max_constraints;
2140       }
2141       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
2142 #if defined(PETSC_USE_COMPLEX)
2143       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
2144 #endif
2145       /* now we evaluate the optimal workspace using query with lwork=-1 */
2146       lwork = -1;
2147       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
2148       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
2149       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
2150       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2151 #if !defined(PETSC_USE_COMPLEX)
2152       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
2153 #else
2154       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
2155 #endif
2156       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2157       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
2158 #endif /* on missing GESVD */
2159       /* Allocate optimal workspace */
2160       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
2161       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
2162     }
2163     /* Now we can loop on constraining sets */
2164     total_counts = 0;
2165     constraints_idxs_ptr[0] = 0;
2166     constraints_data_ptr[0] = 0;
2167     /* vertices */
2168     if (n_vertices) {
2169       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2170       if (nnsp_has_cnst) { /* it considers all possible vertices */
2171         ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2172         for (i=0;i<n_vertices;i++) {
2173           constraints_n[total_counts] = 1;
2174           constraints_data[total_counts] = 1.0;
2175           constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2176           constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2177           total_counts++;
2178         }
2179       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
2180         PetscBool used_vertex;
2181         for (i=0;i<n_vertices;i++) {
2182           used_vertex = PETSC_FALSE;
2183           k = 0;
2184           while (!used_vertex && k<nnsp_size) {
2185             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2186             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
2187               constraints_n[total_counts] = 1;
2188               constraints_idxs[total_counts] = is_indices[i];
2189               constraints_data[total_counts] = 1.0;
2190               constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2191               constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2192               total_counts++;
2193               used_vertex = PETSC_TRUE;
2194             }
2195             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2196             k++;
2197           }
2198         }
2199       }
2200       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2201       n_vertices = total_counts;
2202     }
2203 
2204     /* edges and faces */
2205     total_counts_cc = total_counts;
2206     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2207       IS        used_is;
2208       PetscBool idxs_copied = PETSC_FALSE;
2209 
2210       if (ncc<n_ISForEdges) {
2211         used_is = ISForEdges[ncc];
2212         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2213       } else {
2214         used_is = ISForFaces[ncc-n_ISForEdges];
2215         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2216       }
2217       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2218 
2219       ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr);
2220       ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2221       /* change of basis should not be performed on local periodic nodes */
2222       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2223       if (nnsp_has_cnst) {
2224         PetscScalar quad_value;
2225 
2226         ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2227         idxs_copied = PETSC_TRUE;
2228 
2229         if (!pcbddc->use_nnsp_true) {
2230           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2231         } else {
2232           quad_value = 1.0;
2233         }
2234         for (j=0;j<size_of_constraint;j++) {
2235           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
2236         }
2237         temp_constraints++;
2238         total_counts++;
2239       }
2240       for (k=0;k<nnsp_size;k++) {
2241         PetscReal real_value;
2242         PetscScalar *ptr_to_data;
2243 
2244         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2245         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
2246         for (j=0;j<size_of_constraint;j++) {
2247           ptr_to_data[j] = array[is_indices[j]];
2248         }
2249         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2250         /* check if array is null on the connected component */
2251         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2252         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
2253         if (real_value > 0.0) { /* keep indices and values */
2254           temp_constraints++;
2255           total_counts++;
2256           if (!idxs_copied) {
2257             ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2258             idxs_copied = PETSC_TRUE;
2259           }
2260         }
2261       }
2262       ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2263       valid_constraints = temp_constraints;
2264       if (!pcbddc->use_nnsp_true && temp_constraints) {
2265         if (temp_constraints == 1) { /* just normalize the constraint */
2266           PetscScalar norm,*ptr_to_data;
2267 
2268           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2269           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2270           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
2271           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2272           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
2273         } else { /* perform SVD */
2274           PetscReal   tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2275           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2276 
2277 #if defined(PETSC_MISSING_LAPACK_GESVD)
2278           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2279              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2280              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2281                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2282                 from that computed using LAPACKgesvd
2283              -> This is due to a different computation of eigenvectors in LAPACKheev
2284              -> The quality of the POD-computed basis will be the same */
2285           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2286           /* Store upper triangular part of correlation matrix */
2287           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2288           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2289           for (j=0;j<temp_constraints;j++) {
2290             for (k=0;k<j+1;k++) {
2291               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
2292             }
2293           }
2294           /* compute eigenvalues and eigenvectors of correlation matrix */
2295           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2296           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2297 #if !defined(PETSC_USE_COMPLEX)
2298           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2299 #else
2300           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2301 #endif
2302           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2303           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2304           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2305           j = 0;
2306           while (j < temp_constraints && singular_vals[j] < tol) j++;
2307           total_counts = total_counts-j;
2308           valid_constraints = temp_constraints-j;
2309           /* scale and copy POD basis into used quadrature memory */
2310           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2311           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2312           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2313           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2314           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2315           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2316           if (j<temp_constraints) {
2317             PetscInt ii;
2318             for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
2319             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2320             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2321             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2322             for (k=0;k<temp_constraints-j;k++) {
2323               for (ii=0;ii<size_of_constraint;ii++) {
2324                 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2325               }
2326             }
2327           }
2328 #else  /* on missing GESVD */
2329           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2330           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2331           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2332           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2333 #if !defined(PETSC_USE_COMPLEX)
2334           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2335 #else
2336           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2337 #endif
2338           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2339           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2340           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2341           k = temp_constraints;
2342           if (k > size_of_constraint) k = size_of_constraint;
2343           j = 0;
2344           while (j < k && singular_vals[k-j-1] < tol) j++;
2345           valid_constraints = k-j;
2346           total_counts = total_counts-temp_constraints+valid_constraints;
2347 #endif /* on missing GESVD */
2348         }
2349       }
2350       /* update pointers information */
2351       if (valid_constraints) {
2352         constraints_n[total_counts_cc] = valid_constraints;
2353         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
2354         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
2355         /* set change_of_basis flag */
2356         if (boolforchange) {
2357           PetscBTSet(change_basis,total_counts_cc);
2358         }
2359         total_counts_cc++;
2360       }
2361     }
2362     /* free workspace */
2363     if (!skip_lapack) {
2364       ierr = PetscFree(work);CHKERRQ(ierr);
2365 #if defined(PETSC_USE_COMPLEX)
2366       ierr = PetscFree(rwork);CHKERRQ(ierr);
2367 #endif
2368       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2369 #if defined(PETSC_MISSING_LAPACK_GESVD)
2370       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2371       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2372 #endif
2373     }
2374     for (k=0;k<nnsp_size;k++) {
2375       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2376     }
2377     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2378     /* free index sets of faces, edges and vertices */
2379     for (i=0;i<n_ISForFaces;i++) {
2380       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2381     }
2382     if (n_ISForFaces) {
2383       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2384     }
2385     for (i=0;i<n_ISForEdges;i++) {
2386       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2387     }
2388     if (n_ISForEdges) {
2389       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2390     }
2391     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2392   } else {
2393     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2394 
2395     total_counts = 0;
2396     n_vertices = 0;
2397     if (sub_schurs->is_vertices && pcbddc->use_vertices) {
2398       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
2399     }
2400     max_constraints = 0;
2401     total_counts_cc = 0;
2402     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2403       total_counts += pcbddc->adaptive_constraints_n[i];
2404       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
2405       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2406     }
2407     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
2408     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
2409     constraints_idxs = pcbddc->adaptive_constraints_idxs;
2410     constraints_data = pcbddc->adaptive_constraints_data;
2411     /* constraints_n differs from pcbddc->adaptive_constraints_n */
2412     ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr);
2413     total_counts_cc = 0;
2414     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2415       if (pcbddc->adaptive_constraints_n[i]) {
2416         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
2417       }
2418     }
2419 #if 0
2420     printf("Found %d totals (%d)\n",total_counts_cc,total_counts);
2421     for (i=0;i<total_counts_cc;i++) {
2422       printf("const %d, start %d",i,constraints_idxs_ptr[i]);
2423       printf(" end %d:\n",constraints_idxs_ptr[i+1]);
2424       for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) {
2425         printf(" %d",constraints_idxs[j]);
2426       }
2427       printf("\n");
2428       printf("number of cc: %d\n",constraints_n[i]);
2429     }
2430     for (i=0;i<n_vertices;i++) {
2431       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
2432     }
2433     for (i=0;i<sub_schurs->n_subs;i++) {
2434       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2435     }
2436 #endif
2437 
2438     max_size_of_constraint = 0;
2439     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
2440     ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr);
2441     /* Change of basis */
2442     ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr);
2443     if (pcbddc->use_change_of_basis) {
2444       for (i=0;i<sub_schurs->n_subs;i++) {
2445         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2446           ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr);
2447         }
2448       }
2449     }
2450   }
2451   pcbddc->local_primal_size = total_counts;
2452   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2453 
2454   /* map constraints_idxs in boundary numbering */
2455   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr);
2456   if (i != constraints_idxs_ptr[total_counts_cc]) {
2457     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i);
2458   }
2459 
2460   /* Create constraint matrix */
2461   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2462   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2463   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2464 
2465   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2466   /* determine if a QR strategy is needed for change of basis */
2467   qr_needed = PETSC_FALSE;
2468   ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr);
2469   total_primal_vertices=0;
2470   pcbddc->local_primal_size_cc = 0;
2471   for (i=0;i<total_counts_cc;i++) {
2472     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2473     if (size_of_constraint == 1) {
2474       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
2475       pcbddc->local_primal_size_cc += 1;
2476     } else if (PetscBTLookup(change_basis,i)) {
2477       for (k=0;k<constraints_n[i];k++) {
2478         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2479       }
2480       pcbddc->local_primal_size_cc += constraints_n[i];
2481       if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) {
2482         PetscBTSet(qr_needed_idx,i);
2483         qr_needed = PETSC_TRUE;
2484       }
2485     } else {
2486       pcbddc->local_primal_size_cc += 1;
2487     }
2488   }
2489   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
2490   pcbddc->n_vertices = total_primal_vertices;
2491   /* permute indices in order to have a sorted set of vertices */
2492   ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2493 
2494   ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
2495   ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2496   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
2497 
2498   /* nonzero structure of constraint matrix */
2499   /* and get reference dof for local constraints */
2500   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2501   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
2502 
2503   j = total_primal_vertices;
2504   total_counts = total_primal_vertices;
2505   cum = total_primal_vertices;
2506   for (i=n_vertices;i<total_counts_cc;i++) {
2507     if (!PetscBTLookup(change_basis,i)) {
2508       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
2509       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
2510       cum++;
2511       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2512       for (k=0;k<constraints_n[i];k++) {
2513         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2514         nnz[j+k] = size_of_constraint;
2515       }
2516       j += constraints_n[i];
2517     }
2518   }
2519   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2520   ierr = PetscFree(nnz);CHKERRQ(ierr);
2521 
2522   /* set values in constraint matrix */
2523   for (i=0;i<total_primal_vertices;i++) {
2524     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2525   }
2526   total_counts = total_primal_vertices;
2527   for (i=n_vertices;i<total_counts_cc;i++) {
2528     if (!PetscBTLookup(change_basis,i)) {
2529       PetscInt *cols;
2530 
2531       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2532       cols = constraints_idxs+constraints_idxs_ptr[i];
2533       for (k=0;k<constraints_n[i];k++) {
2534         PetscInt    row = total_counts+k;
2535         PetscScalar *vals;
2536 
2537         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
2538         ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2539       }
2540       total_counts += constraints_n[i];
2541     }
2542   }
2543   /* assembling */
2544   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2545   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2546 
2547   /*
2548   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2549   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2550   */
2551   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2552   if (pcbddc->use_change_of_basis) {
2553     /* dual and primal dofs on a single cc */
2554     PetscInt     dual_dofs,primal_dofs;
2555     /* working stuff for GEQRF */
2556     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2557     PetscBLASInt lqr_work;
2558     /* working stuff for UNGQR */
2559     PetscScalar  *gqr_work,lgqr_work_t;
2560     PetscBLASInt lgqr_work;
2561     /* working stuff for TRTRS */
2562     PetscScalar  *trs_rhs;
2563     PetscBLASInt Blas_NRHS;
2564     /* pointers for values insertion into change of basis matrix */
2565     PetscInt     *start_rows,*start_cols;
2566     PetscScalar  *start_vals;
2567     /* working stuff for values insertion */
2568     PetscBT      is_primal;
2569     PetscInt     *aux_primal_numbering_B;
2570     /* matrix sizes */
2571     PetscInt     global_size,local_size;
2572     /* temporary change of basis */
2573     Mat          localChangeOfBasisMatrix;
2574     /* extra space for debugging */
2575     PetscScalar  *dbg_work;
2576 
2577     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2578     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2579     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2580     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2581     /* nonzeros for local mat */
2582     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2583     for (i=0;i<pcis->n;i++) nnz[i]=1;
2584     for (i=n_vertices;i<total_counts_cc;i++) {
2585       if (PetscBTLookup(change_basis,i)) {
2586         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2587         if (PetscBTLookup(qr_needed_idx,i)) {
2588           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
2589         } else {
2590           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
2591           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
2592         }
2593       }
2594     }
2595     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2596     ierr = PetscFree(nnz);CHKERRQ(ierr);
2597     /* Set initial identity in the matrix */
2598     for (i=0;i<pcis->n;i++) {
2599       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2600     }
2601 
2602     if (pcbddc->dbg_flag) {
2603       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2604       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2605     }
2606 
2607 
2608     /* Now we loop on the constraints which need a change of basis */
2609     /*
2610        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2611        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2612 
2613        Basic blocks of change of basis matrix T computed by
2614 
2615           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2616 
2617             | 1        0   ...        0         s_1/S |
2618             | 0        1   ...        0         s_2/S |
2619             |              ...                        |
2620             | 0        ...            1     s_{n-1}/S |
2621             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2622 
2623             with S = \sum_{i=1}^n s_i^2
2624             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2625                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2626 
2627           - QR decomposition of constraints otherwise
2628     */
2629     if (qr_needed) {
2630       /* space to store Q */
2631       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2632       /* first we issue queries for optimal work */
2633       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2634       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2635       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2636       lqr_work = -1;
2637       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2638       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2639       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2640       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2641       lgqr_work = -1;
2642       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2643       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2644       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2645       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2646       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2647       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2648       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2649       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2650       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2651       /* array to store scaling factors for reflectors */
2652       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2653       /* array to store rhs and solution of triangular solver */
2654       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2655       /* allocating workspace for check */
2656       if (pcbddc->dbg_flag) {
2657         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
2658       }
2659     }
2660     /* array to store whether a node is primal or not */
2661     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2662     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2663     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2664     if (i != total_primal_vertices) {
2665       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2666     }
2667     for (i=0;i<total_primal_vertices;i++) {
2668       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2669     }
2670     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2671 
2672     /* loop on constraints and see whether or not they need a change of basis and compute it */
2673     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
2674       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
2675       if (PetscBTLookup(change_basis,total_counts)) {
2676         /* get constraint info */
2677         primal_dofs = constraints_n[total_counts];
2678         dual_dofs = size_of_constraint-primal_dofs;
2679 
2680         if (pcbddc->dbg_flag) {
2681           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr);
2682         }
2683 
2684         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2685 
2686           /* copy quadrature constraints for change of basis check */
2687           if (pcbddc->dbg_flag) {
2688             ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2689           }
2690           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2691           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2692 
2693           /* compute QR decomposition of constraints */
2694           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2695           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2696           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2697           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2698           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2699           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2700           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2701 
2702           /* explictly compute R^-T */
2703           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2704           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2705           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2706           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2707           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2708           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2709           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2710           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2711           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2712           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2713 
2714           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2715           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2716           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2717           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2718           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2719           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2720           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2721           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2722           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2723 
2724           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2725              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2726              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2727           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2728           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2729           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2730           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2731           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2732           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2733           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2734           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
2735           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2736           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2737 
2738           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2739           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
2740           /* insert cols for primal dofs */
2741           for (j=0;j<primal_dofs;j++) {
2742             start_vals = &qr_basis[j*size_of_constraint];
2743             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2744             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2745           }
2746           /* insert cols for dual dofs */
2747           for (j=0,k=0;j<dual_dofs;k++) {
2748             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
2749               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2750               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2751               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2752               j++;
2753             }
2754           }
2755 
2756           /* check change of basis */
2757           if (pcbddc->dbg_flag) {
2758             PetscInt   ii,jj;
2759             PetscBool valid_qr=PETSC_TRUE;
2760             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2761             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2762             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2763             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2764             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2765             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2766             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2767             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
2768             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2769             for (jj=0;jj<size_of_constraint;jj++) {
2770               for (ii=0;ii<primal_dofs;ii++) {
2771                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2772                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2773               }
2774             }
2775             if (!valid_qr) {
2776               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2777               for (jj=0;jj<size_of_constraint;jj++) {
2778                 for (ii=0;ii<primal_dofs;ii++) {
2779                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2780                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2781                   }
2782                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2783                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2784                   }
2785                 }
2786               }
2787             } else {
2788               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2789             }
2790           }
2791         } else { /* simple transformation block */
2792           PetscInt    row,col;
2793           PetscScalar val,norm;
2794 
2795           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2796           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
2797           for (j=0;j<size_of_constraint;j++) {
2798             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
2799             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2800             if (!PetscBTLookup(is_primal,row_B)) {
2801               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
2802               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2803               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2804             } else {
2805               for (k=0;k<size_of_constraint;k++) {
2806                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2807                 if (row != col) {
2808                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
2809                 } else {
2810                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
2811                 }
2812                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2813               }
2814             }
2815           }
2816           if (pcbddc->dbg_flag) {
2817             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2818           }
2819         }
2820       } else {
2821         if (pcbddc->dbg_flag) {
2822           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr);
2823         }
2824       }
2825     }
2826 
2827     /* free workspace */
2828     if (qr_needed) {
2829       if (pcbddc->dbg_flag) {
2830         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
2831       }
2832       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2833       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2834       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2835       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2836       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2837     }
2838     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2839     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2840     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2841 
2842     /* assembling of global change of variable */
2843     {
2844       Mat      tmat;
2845       PetscInt bs;
2846 
2847       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2848       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2849       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2850       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2851       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2852       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2853       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
2854       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
2855       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2856       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2857       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2858       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2859       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2860       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2861       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2862       ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2863       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2864       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2865     }
2866     /* check */
2867     if (pcbddc->dbg_flag) {
2868       PetscReal error;
2869       Vec       x,x_change;
2870 
2871       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2872       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2873       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2874       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2875       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2876       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2877       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2878       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2879       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2880       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2881       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2882       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2883       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2884       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2885       ierr = VecDestroy(&x);CHKERRQ(ierr);
2886       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2887     }
2888 
2889     /* adapt sub_schurs computed (if any) */
2890     if (pcbddc->use_deluxe_scaling) {
2891       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2892       if (sub_schurs->S_Ej_all) {
2893         Mat S_new,tmat;
2894         IS is_all_N;
2895 
2896         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2897         ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
2898         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2899         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2900         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2901         ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2902         sub_schurs->S_Ej_all = S_new;
2903         ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2904         if (sub_schurs->sum_S_Ej_all) {
2905           ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2906           ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2907           ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2908           sub_schurs->sum_S_Ej_all = S_new;
2909           ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2910         }
2911         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2912       }
2913     }
2914     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2915   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2916     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2917     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2918   }
2919 
2920   /* set up change of basis context */
2921   if (pcbddc->ChangeOfBasisMatrix) {
2922     PCBDDCChange_ctx change_ctx;
2923 
2924     if (!pcbddc->new_global_mat) {
2925       PetscInt global_size,local_size;
2926 
2927       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2928       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2929       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2930       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2931       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2932       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2933       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2934       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2935       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2936     } else {
2937       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2938       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2939       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2940     }
2941     if (!pcbddc->user_ChangeOfBasisMatrix) {
2942       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2943       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2944     } else {
2945       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2946       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2947     }
2948     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2949     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2950     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2951     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2952   }
2953 
2954   /* check if a new primal space has been introduced */
2955   pcbddc->new_primal_space_local = PETSC_TRUE;
2956   if (olocal_primal_size == pcbddc->local_primal_size) {
2957     ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2958     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2959     if (!pcbddc->new_primal_space_local) {
2960       ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2961       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2962     }
2963   }
2964   ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr);
2965   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2966   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2967 
2968   /* flush dbg viewer */
2969   if (pcbddc->dbg_flag) {
2970     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2971   }
2972 
2973   /* free workspace */
2974   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2975   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2976   if (!pcbddc->adaptive_selection) {
2977     ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr);
2978     ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr);
2979   } else {
2980     ierr = PetscFree5(pcbddc->adaptive_constraints_n,
2981                       pcbddc->adaptive_constraints_idxs_ptr,
2982                       pcbddc->adaptive_constraints_data_ptr,
2983                       pcbddc->adaptive_constraints_idxs,
2984                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2985     ierr = PetscFree(constraints_n);CHKERRQ(ierr);
2986     ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr);
2987   }
2988   PetscFunctionReturn(0);
2989 }
2990 
2991 #undef __FUNCT__
2992 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2993 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2994 {
2995   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2996   PC_IS       *pcis = (PC_IS*)pc->data;
2997   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2998   PetscInt    ierr,i,vertex_size,N;
2999   PetscViewer viewer=pcbddc->dbg_viewer;
3000 
3001   PetscFunctionBegin;
3002   /* Reset previously computed graph */
3003   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
3004   /* Init local Graph struct */
3005   ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr);
3006   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);CHKERRQ(ierr);
3007 
3008   /* Check validity of the csr graph passed in by the user */
3009   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
3010     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
3011   }
3012 
3013   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
3014   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
3015     PetscInt  *xadj,*adjncy;
3016     PetscInt  nvtxs;
3017     PetscBool flg_row=PETSC_FALSE;
3018 
3019     if (pcbddc->use_local_adj) {
3020 
3021       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3022       if (flg_row) {
3023         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
3024         pcbddc->computed_rowadj = PETSC_TRUE;
3025       }
3026       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3027     } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */
3028       IS                     is_dummy;
3029       ISLocalToGlobalMapping l2gmap_dummy;
3030       PetscInt               j,sum;
3031       PetscInt               *cxadj,*cadjncy;
3032       const PetscInt         *idxs;
3033       PCBDDCGraph            graph;
3034       PetscBT                is_on_boundary;
3035 
3036       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
3037       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
3038       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
3039       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
3040       ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr);
3041       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
3042       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3043       if (flg_row) {
3044         graph->xadj = xadj;
3045         graph->adjncy = adjncy;
3046       }
3047       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
3048       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
3049       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3050 
3051       if (pcbddc->dbg_flag) {
3052         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr);
3053         for (i=0;i<graph->ncc;i++) {
3054           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
3055         }
3056       }
3057 
3058       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
3059       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3060       for (i=0;i<pcis->n_B;i++) {
3061         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
3062       }
3063       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3064 
3065       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
3066       sum = 0;
3067       for (i=0;i<graph->ncc;i++) {
3068         PetscInt sizecc = 0;
3069         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3070           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3071             sizecc++;
3072           }
3073         }
3074         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3075           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3076             cxadj[graph->queue[j]] = sizecc;
3077           }
3078         }
3079         sum += sizecc*sizecc;
3080       }
3081       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
3082       sum = 0;
3083       for (i=0;i<pcis->n;i++) {
3084         PetscInt temp = cxadj[i];
3085         cxadj[i] = sum;
3086         sum += temp;
3087       }
3088       cxadj[pcis->n] = sum;
3089       for (i=0;i<graph->ncc;i++) {
3090         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3091           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3092             PetscInt k,sizecc = 0;
3093             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
3094               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
3095                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
3096                 sizecc++;
3097               }
3098             }
3099           }
3100         }
3101       }
3102       if (sum) {
3103         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
3104       } else {
3105         ierr = PetscFree(cxadj);CHKERRQ(ierr);
3106         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
3107       }
3108       graph->xadj = 0;
3109       graph->adjncy = 0;
3110       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
3111       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
3112     }
3113   }
3114   if (pcbddc->dbg_flag) {
3115     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3116   }
3117 
3118   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
3119   vertex_size = 1;
3120   if (pcbddc->user_provided_isfordofs) {
3121     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
3122       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3123       for (i=0;i<pcbddc->n_ISForDofs;i++) {
3124         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3125         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
3126       }
3127       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
3128       pcbddc->n_ISForDofs = 0;
3129       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
3130     }
3131     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
3132     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
3133   } else {
3134     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
3135       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
3136       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3137       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3138         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3139       }
3140     }
3141   }
3142 
3143   /* Setup of Graph */
3144   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3145     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
3146   }
3147   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3148     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
3149   }
3150   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr);
3151 
3152   /* Graph's connected components analysis */
3153   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
3154 
3155   /* print some info to stdout */
3156   if (pcbddc->dbg_flag) {
3157     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr);
3158   }
3159 
3160   /* mark topography has done */
3161   pcbddc->recompute_topography = PETSC_FALSE;
3162   PetscFunctionReturn(0);
3163 }
3164 
3165 #undef __FUNCT__
3166 #define __FUNCT__ "PCBDDCSubsetNumbering"
3167 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
3168 {
3169   Vec            local_vec,global_vec;
3170   IS             seqis,paris;
3171   VecScatter     scatter_ctx;
3172   PetscScalar    *array;
3173   PetscInt       *temp_global_dofs;
3174   PetscScalar    globalsum;
3175   PetscInt       i,j,s;
3176   PetscInt       nlocals,first_index,old_index,max_local,max_global;
3177   PetscMPIInt    rank_prec_comm,size_prec_comm;
3178   PetscInt       *dof_sizes,*dof_displs;
3179   PetscBool      first_found;
3180   PetscErrorCode ierr;
3181 
3182   PetscFunctionBegin;
3183   /* mpi buffers */
3184   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
3185   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
3186   j = ( !rank_prec_comm ? size_prec_comm : 0);
3187   ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr);
3188   /* get maximum size of subset */
3189   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
3190   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
3191   max_local = 0;
3192   for (i=0;i<n_local_dofs;i++) {
3193     if (max_local < temp_global_dofs[i] ) {
3194       max_local = temp_global_dofs[i];
3195     }
3196   }
3197   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
3198   max_global++;
3199   max_local = 0;
3200   for (i=0;i<n_local_dofs;i++) {
3201     if (max_local < local_dofs[i] ) {
3202       max_local = local_dofs[i];
3203     }
3204   }
3205   max_local++;
3206   /* allocate workspace */
3207   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
3208   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
3209   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
3210   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
3211   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
3212   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
3213   /* create scatter */
3214   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
3215   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
3216   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
3217   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
3218   ierr = ISDestroy(&paris);CHKERRQ(ierr);
3219   /* init array */
3220   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
3221   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3222   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3223   if (local_dofs_mult) {
3224     for (i=0;i<n_local_dofs;i++) {
3225       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
3226     }
3227   } else {
3228     for (i=0;i<n_local_dofs;i++) {
3229       array[local_dofs[i]]=1.0;
3230     }
3231   }
3232   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3233   /* scatter into global vec and get total number of global dofs */
3234   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3235   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3236   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
3237   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
3238   /* Fill global_vec with cumulative function for global numbering */
3239   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
3240   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
3241   nlocals = 0;
3242   first_index = -1;
3243   first_found = PETSC_FALSE;
3244   for (i=0;i<s;i++) {
3245     if (!first_found && PetscRealPart(array[i]) > 0.1) {
3246       first_found = PETSC_TRUE;
3247       first_index = i;
3248     }
3249     nlocals += (PetscInt)PetscRealPart(array[i]);
3250   }
3251   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3252   if (!rank_prec_comm) {
3253     dof_displs[0]=0;
3254     for (i=1;i<size_prec_comm;i++) {
3255       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3256     }
3257   }
3258   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3259   if (first_found) {
3260     array[first_index] += (PetscScalar)nlocals;
3261     old_index = first_index;
3262     for (i=first_index+1;i<s;i++) {
3263       if (PetscRealPart(array[i]) > 0.1) {
3264         array[i] += array[old_index];
3265         old_index = i;
3266       }
3267     }
3268   }
3269   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
3270   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3271   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3272   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3273   /* get global ordering of local dofs */
3274   ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3275   if (local_dofs_mult) {
3276     for (i=0;i<n_local_dofs;i++) {
3277       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
3278     }
3279   } else {
3280     for (i=0;i<n_local_dofs;i++) {
3281       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
3282     }
3283   }
3284   ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3285   /* free workspace */
3286   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
3287   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
3288   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
3289   ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr);
3290   /* return pointer to global ordering of local dofs */
3291   *global_numbering_subset = temp_global_dofs;
3292   PetscFunctionReturn(0);
3293 }
3294 
3295 #undef __FUNCT__
3296 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3297 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3298 {
3299   PetscInt       i,j;
3300   PetscScalar    *alphas;
3301   PetscErrorCode ierr;
3302 
3303   PetscFunctionBegin;
3304   /* this implements stabilized Gram-Schmidt */
3305   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3306   for (i=0;i<n;i++) {
3307     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3308     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3309     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3310   }
3311   ierr = PetscFree(alphas);CHKERRQ(ierr);
3312   PetscFunctionReturn(0);
3313 }
3314 
3315 #undef __FUNCT__
3316 #define __FUNCT__ "MatISGetSubassemblingPattern"
3317 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
3318 {
3319   Mat             subdomain_adj;
3320   IS              new_ranks,ranks_send_to;
3321   MatPartitioning partitioner;
3322   Mat_IS          *matis;
3323   PetscInt        n_neighs,*neighs,*n_shared,**shared;
3324   PetscInt        prank;
3325   PetscMPIInt     size,rank,color;
3326   PetscInt        *xadj,*adjncy,*oldranks;
3327   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
3328   PetscInt        i,local_size,threshold=0;
3329   PetscErrorCode  ierr;
3330   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3331   PetscSubcomm    subcomm;
3332 
3333   PetscFunctionBegin;
3334   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3335   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3336   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3337 
3338   /* Get info on mapping */
3339   matis = (Mat_IS*)(mat->data);
3340   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3341   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3342 
3343   /* build local CSR graph of subdomains' connectivity */
3344   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3345   xadj[0] = 0;
3346   xadj[1] = PetscMax(n_neighs-1,0);
3347   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3348   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3349 
3350   if (threshold) {
3351     PetscInt xadj_count = 0;
3352     for (i=1;i<n_neighs;i++) {
3353       if (n_shared[i] > threshold) {
3354         adjncy[xadj_count] = neighs[i];
3355         adjncy_wgt[xadj_count] = n_shared[i];
3356         xadj_count++;
3357       }
3358     }
3359     xadj[1] = xadj_count;
3360   } else {
3361     if (xadj[1]) {
3362       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3363       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3364     }
3365   }
3366   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3367   if (use_square) {
3368     for (i=0;i<xadj[1];i++) {
3369       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3370     }
3371   }
3372   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3373 
3374   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3375 
3376   /*
3377     Restrict work on active processes only.
3378   */
3379   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3380   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3381   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3382   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3383   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3384   if (color) {
3385     ierr = PetscFree(xadj);CHKERRQ(ierr);
3386     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3387     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3388   } else {
3389     PetscInt coarsening_ratio;
3390     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
3391     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3392     prank = rank;
3393     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
3394     /*
3395     for (i=0;i<size;i++) {
3396       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3397     }
3398     */
3399     for (i=0;i<xadj[1];i++) {
3400       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3401     }
3402     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3403     ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3404     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3405 
3406     /* Partition */
3407     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
3408     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3409     if (use_vwgt) {
3410       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3411       v_wgt[0] = local_size;
3412       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3413     }
3414     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3415     coarsening_ratio = size/n_subdomains;
3416     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3417     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3418     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3419     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3420 
3421     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3422     if (contiguous) {
3423       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3424     } else {
3425       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3426     }
3427     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3428     /* clean up */
3429     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3430     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3431     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3432     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3433   }
3434   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3435 
3436   /* assemble parallel IS for sends */
3437   i = 1;
3438   if (color) i=0;
3439   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3440 
3441   /* get back IS */
3442   *is_sends = ranks_send_to;
3443   PetscFunctionReturn(0);
3444 }
3445 
3446 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3447 
3448 #undef __FUNCT__
3449 #define __FUNCT__ "MatISSubassemble"
3450 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3451 {
3452   Mat                    local_mat;
3453   Mat_IS                 *matis;
3454   IS                     is_sends_internal;
3455   PetscInt               rows,cols,new_local_rows;
3456   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3457   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3458   ISLocalToGlobalMapping l2gmap;
3459   PetscInt*              l2gmap_indices;
3460   const PetscInt*        is_indices;
3461   MatType                new_local_type;
3462   /* buffers */
3463   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3464   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3465   PetscInt               *recv_buffer_idxs_local;
3466   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3467   /* MPI */
3468   MPI_Comm               comm,comm_n;
3469   PetscSubcomm           subcomm;
3470   PetscMPIInt            n_sends,n_recvs,commsize;
3471   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3472   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3473   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3474   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3475   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3476   PetscErrorCode         ierr;
3477 
3478   PetscFunctionBegin;
3479   /* TODO: add missing checks */
3480   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3481   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3482   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3483   PetscValidLogicalCollectiveInt(mat,nis,7);
3484   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3485   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3486   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3487   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3488   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3489   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3490   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3491   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3492     PetscInt mrows,mcols,mnrows,mncols;
3493     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3494     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3495     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3496     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3497     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3498     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3499   }
3500   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3501   PetscValidLogicalCollectiveInt(mat,bs,0);
3502   /* prepare IS for sending if not provided */
3503   if (!is_sends) {
3504     PetscBool pcontig = PETSC_TRUE;
3505     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3506     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3507   } else {
3508     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3509     is_sends_internal = is_sends;
3510   }
3511 
3512   /* get pointer of MATIS data */
3513   matis = (Mat_IS*)mat->data;
3514 
3515   /* get comm */
3516   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3517 
3518   /* compute number of sends */
3519   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3520   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3521 
3522   /* compute number of receives */
3523   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3524   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3525   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3526   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3527   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3528   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3529   ierr = PetscFree(iflags);CHKERRQ(ierr);
3530 
3531   /* restrict comm if requested */
3532   subcomm = 0;
3533   destroy_mat = PETSC_FALSE;
3534   if (restrict_comm) {
3535     PetscMPIInt color,subcommsize;
3536 
3537     color = 0;
3538     if (!n_recvs && n_sends) color = 1; /* processes sending and not receiving anything will not partecipate in new comm */
3539     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3540     subcommsize = commsize - subcommsize;
3541     /* check if reuse has been requested */
3542     if (reuse == MAT_REUSE_MATRIX) {
3543       if (*mat_n) {
3544         PetscMPIInt subcommsize2;
3545         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3546         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3547         comm_n = PetscObjectComm((PetscObject)*mat_n);
3548       } else {
3549         comm_n = PETSC_COMM_SELF;
3550       }
3551     } else { /* MAT_INITIAL_MATRIX */
3552       PetscMPIInt rank;
3553 
3554       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3555       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3556       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3557       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3558       comm_n = PetscSubcommChild(subcomm);
3559     }
3560     /* flag to destroy *mat_n if not significative */
3561     if (color) destroy_mat = PETSC_TRUE;
3562   } else {
3563     comm_n = comm;
3564   }
3565 
3566   /* prepare send/receive buffers */
3567   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3568   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3569   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3570   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3571   if (nis) {
3572     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3573   }
3574 
3575   /* Get data from local matrices */
3576   if (!isdense) {
3577     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3578     /* TODO: See below some guidelines on how to prepare the local buffers */
3579     /*
3580        send_buffer_vals should contain the raw values of the local matrix
3581        send_buffer_idxs should contain:
3582        - MatType_PRIVATE type
3583        - PetscInt        size_of_l2gmap
3584        - PetscInt        global_row_indices[size_of_l2gmap]
3585        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3586     */
3587   } else {
3588     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3589     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3590     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3591     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3592     send_buffer_idxs[1] = i;
3593     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3594     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3595     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3596     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3597     for (i=0;i<n_sends;i++) {
3598       ilengths_vals[is_indices[i]] = len*len;
3599       ilengths_idxs[is_indices[i]] = len+2;
3600     }
3601   }
3602   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3603   /* additional is (if any) */
3604   if (nis) {
3605     PetscMPIInt psum;
3606     PetscInt j;
3607     for (j=0,psum=0;j<nis;j++) {
3608       PetscInt plen;
3609       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3610       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3611       psum += len+1; /* indices + lenght */
3612     }
3613     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3614     for (j=0,psum=0;j<nis;j++) {
3615       PetscInt plen;
3616       const PetscInt *is_array_idxs;
3617       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3618       send_buffer_idxs_is[psum] = plen;
3619       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3620       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3621       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3622       psum += plen+1; /* indices + lenght */
3623     }
3624     for (i=0;i<n_sends;i++) {
3625       ilengths_idxs_is[is_indices[i]] = psum;
3626     }
3627     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3628   }
3629 
3630   buf_size_idxs = 0;
3631   buf_size_vals = 0;
3632   buf_size_idxs_is = 0;
3633   for (i=0;i<n_recvs;i++) {
3634     buf_size_idxs += (PetscInt)olengths_idxs[i];
3635     buf_size_vals += (PetscInt)olengths_vals[i];
3636     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3637   }
3638   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3639   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3640   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3641 
3642   /* get new tags for clean communications */
3643   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3644   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3645   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3646 
3647   /* allocate for requests */
3648   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3649   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3650   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3651   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3652   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3653   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3654 
3655   /* communications */
3656   ptr_idxs = recv_buffer_idxs;
3657   ptr_vals = recv_buffer_vals;
3658   ptr_idxs_is = recv_buffer_idxs_is;
3659   for (i=0;i<n_recvs;i++) {
3660     source_dest = onodes[i];
3661     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3662     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3663     ptr_idxs += olengths_idxs[i];
3664     ptr_vals += olengths_vals[i];
3665     if (nis) {
3666       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3667       ptr_idxs_is += olengths_idxs_is[i];
3668     }
3669   }
3670   for (i=0;i<n_sends;i++) {
3671     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3672     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3673     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3674     if (nis) {
3675       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3676     }
3677   }
3678   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3679   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3680 
3681   /* assemble new l2g map */
3682   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3683   ptr_idxs = recv_buffer_idxs;
3684   new_local_rows = 0;
3685   for (i=0;i<n_recvs;i++) {
3686     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3687     ptr_idxs += olengths_idxs[i];
3688   }
3689   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3690   ptr_idxs = recv_buffer_idxs;
3691   new_local_rows = 0;
3692   for (i=0;i<n_recvs;i++) {
3693     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3694     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3695     ptr_idxs += olengths_idxs[i];
3696   }
3697   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3698   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3699   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3700 
3701   /* infer new local matrix type from received local matrices type */
3702   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3703   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3704   if (n_recvs) {
3705     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3706     ptr_idxs = recv_buffer_idxs;
3707     for (i=0;i<n_recvs;i++) {
3708       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3709         new_local_type_private = MATAIJ_PRIVATE;
3710         break;
3711       }
3712       ptr_idxs += olengths_idxs[i];
3713     }
3714     switch (new_local_type_private) {
3715       case MATDENSE_PRIVATE:
3716         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3717           new_local_type = MATSEQAIJ;
3718           bs = 1;
3719         } else { /* if I receive only 1 dense matrix */
3720           new_local_type = MATSEQDENSE;
3721           bs = 1;
3722         }
3723         break;
3724       case MATAIJ_PRIVATE:
3725         new_local_type = MATSEQAIJ;
3726         bs = 1;
3727         break;
3728       case MATBAIJ_PRIVATE:
3729         new_local_type = MATSEQBAIJ;
3730         break;
3731       case MATSBAIJ_PRIVATE:
3732         new_local_type = MATSEQSBAIJ;
3733         break;
3734       default:
3735         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3736         break;
3737     }
3738   } else { /* by default, new_local_type is seqdense */
3739     new_local_type = MATSEQDENSE;
3740     bs = 1;
3741   }
3742 
3743   /* create MATIS object if needed */
3744   if (reuse == MAT_INITIAL_MATRIX) {
3745     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3746     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3747   } else {
3748     /* it also destroys the local matrices */
3749     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3750   }
3751   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3752   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3753 
3754   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3755 
3756   /* Global to local map of received indices */
3757   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3758   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3759   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3760 
3761   /* restore attributes -> type of incoming data and its size */
3762   buf_size_idxs = 0;
3763   for (i=0;i<n_recvs;i++) {
3764     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3765     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3766     buf_size_idxs += (PetscInt)olengths_idxs[i];
3767   }
3768   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3769 
3770   /* set preallocation */
3771   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3772   if (!newisdense) {
3773     PetscInt *new_local_nnz=0;
3774 
3775     ptr_vals = recv_buffer_vals;
3776     ptr_idxs = recv_buffer_idxs_local;
3777     if (n_recvs) {
3778       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3779     }
3780     for (i=0;i<n_recvs;i++) {
3781       PetscInt j;
3782       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3783         for (j=0;j<*(ptr_idxs+1);j++) {
3784           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3785         }
3786       } else {
3787         /* TODO */
3788       }
3789       ptr_idxs += olengths_idxs[i];
3790     }
3791     if (new_local_nnz) {
3792       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3793       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3794       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3795       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3796       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3797       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3798     } else {
3799       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3800     }
3801     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3802   } else {
3803     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3804   }
3805 
3806   /* set values */
3807   ptr_vals = recv_buffer_vals;
3808   ptr_idxs = recv_buffer_idxs_local;
3809   for (i=0;i<n_recvs;i++) {
3810     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3811       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3812       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3813       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3814       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3815       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3816     } else {
3817       /* TODO */
3818     }
3819     ptr_idxs += olengths_idxs[i];
3820     ptr_vals += olengths_vals[i];
3821   }
3822   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3823   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3824   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3825   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3826   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3827   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3828 
3829 #if 0
3830   if (!restrict_comm) { /* check */
3831     Vec       lvec,rvec;
3832     PetscReal infty_error;
3833 
3834     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3835     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3836     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3837     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3838     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3839     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3840     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3841     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3842     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3843   }
3844 #endif
3845 
3846   /* assemble new additional is (if any) */
3847   if (nis) {
3848     PetscInt **temp_idxs,*count_is,j,psum;
3849 
3850     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3851     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3852     ptr_idxs = recv_buffer_idxs_is;
3853     psum = 0;
3854     for (i=0;i<n_recvs;i++) {
3855       for (j=0;j<nis;j++) {
3856         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3857         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3858         psum += plen;
3859         ptr_idxs += plen+1; /* shift pointer to received data */
3860       }
3861     }
3862     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3863     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3864     for (i=1;i<nis;i++) {
3865       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3866     }
3867     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3868     ptr_idxs = recv_buffer_idxs_is;
3869     for (i=0;i<n_recvs;i++) {
3870       for (j=0;j<nis;j++) {
3871         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3872         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3873         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3874         ptr_idxs += plen+1; /* shift pointer to received data */
3875       }
3876     }
3877     for (i=0;i<nis;i++) {
3878       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3879       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3880       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3881     }
3882     ierr = PetscFree(count_is);CHKERRQ(ierr);
3883     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3884     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3885   }
3886   /* free workspace */
3887   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3888   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3889   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3890   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3891   if (isdense) {
3892     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3893     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3894   } else {
3895     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3896   }
3897   if (nis) {
3898     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3899     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3900   }
3901   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3902   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3903   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3904   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3905   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3906   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3907   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3908   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3909   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3910   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3911   ierr = PetscFree(onodes);CHKERRQ(ierr);
3912   if (nis) {
3913     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3914     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3915     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3916   }
3917   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3918   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3919     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3920     for (i=0;i<nis;i++) {
3921       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3922     }
3923   }
3924   PetscFunctionReturn(0);
3925 }
3926 
3927 /* temporary hack into ksp private data structure */
3928 #include <petsc/private/kspimpl.h>
3929 
3930 #undef __FUNCT__
3931 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3932 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3933 {
3934   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3935   PC_IS                  *pcis = (PC_IS*)pc->data;
3936   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3937   MatNullSpace           CoarseNullSpace=NULL;
3938   ISLocalToGlobalMapping coarse_islg;
3939   IS                     coarse_is,*isarray;
3940   PetscInt               i,im_active=-1,active_procs=-1;
3941   PetscInt               nis,nisdofs,nisneu;
3942   PC                     pc_temp;
3943   PCType                 coarse_pc_type;
3944   KSPType                coarse_ksp_type;
3945   PetscBool              multilevel_requested,multilevel_allowed;
3946   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3947   Mat                    t_coarse_mat_is;
3948   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3949   PetscMPIInt            all_procs;
3950   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3951   PetscBool              compute_vecs = PETSC_FALSE;
3952   PetscScalar            *array;
3953   PetscErrorCode         ierr;
3954 
3955   PetscFunctionBegin;
3956   /* Assign global numbering to coarse dofs */
3957   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3958     PetscInt ocoarse_size;
3959     compute_vecs = PETSC_TRUE;
3960     ocoarse_size = pcbddc->coarse_size;
3961     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3962     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3963     /* see if we can avoid some work */
3964     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3965       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3966         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3967         coarse_reuse = PETSC_FALSE;
3968       } else { /* we can safely reuse already computed coarse matrix */
3969         coarse_reuse = PETSC_TRUE;
3970       }
3971     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3972       coarse_reuse = PETSC_FALSE;
3973     }
3974     /* reset any subassembling information */
3975     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3976     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3977   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3978     coarse_reuse = PETSC_TRUE;
3979   }
3980 
3981   /* count "active" (i.e. with positive local size) and "void" processes */
3982   im_active = !!(pcis->n);
3983   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3984   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3985   void_procs = all_procs-active_procs;
3986   csin_type_simple = PETSC_TRUE;
3987   redist = PETSC_FALSE;
3988   if (pcbddc->current_level && void_procs) {
3989     csin_ml = PETSC_TRUE;
3990     ncoarse_ml = void_procs;
3991     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
3992     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
3993       csin_ds = PETSC_TRUE;
3994       ncoarse_ds = pcbddc->redistribute_coarse;
3995       redist = PETSC_TRUE;
3996     } else {
3997       csin_ds = PETSC_TRUE;
3998       ncoarse_ds = active_procs;
3999       redist = PETSC_TRUE;
4000     }
4001   } else {
4002     csin_ml = PETSC_FALSE;
4003     ncoarse_ml = all_procs;
4004     if (void_procs) {
4005       csin_ds = PETSC_TRUE;
4006       ncoarse_ds = void_procs;
4007       csin_type_simple = PETSC_FALSE;
4008     } else {
4009       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
4010         csin_ds = PETSC_TRUE;
4011         ncoarse_ds = pcbddc->redistribute_coarse;
4012         redist = PETSC_TRUE;
4013       } else {
4014         csin_ds = PETSC_FALSE;
4015         ncoarse_ds = all_procs;
4016       }
4017     }
4018   }
4019 
4020   /*
4021     test if we can go multilevel: three conditions must be satisfied:
4022     - we have not exceeded the number of levels requested
4023     - we can actually subassemble the active processes
4024     - we can find a suitable number of MPI processes where we can place the subassembled problem
4025   */
4026   multilevel_allowed = PETSC_FALSE;
4027   multilevel_requested = PETSC_FALSE;
4028   if (pcbddc->current_level < pcbddc->max_levels) {
4029     multilevel_requested = PETSC_TRUE;
4030     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
4031       multilevel_allowed = PETSC_FALSE;
4032     } else {
4033       multilevel_allowed = PETSC_TRUE;
4034     }
4035   }
4036   /* determine number of process partecipating to coarse solver */
4037   if (multilevel_allowed) {
4038     ncoarse = ncoarse_ml;
4039     csin = csin_ml;
4040     redist = PETSC_FALSE;
4041   } else {
4042     ncoarse = ncoarse_ds;
4043     csin = csin_ds;
4044   }
4045 
4046   /* creates temporary l2gmap and IS for coarse indexes */
4047   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
4048   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
4049 
4050   /* creates temporary MATIS object for coarse matrix */
4051   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
4052   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4053   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
4054   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4055 #if 0
4056   {
4057     PetscViewer viewer;
4058     char filename[256];
4059     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
4060     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
4061     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4062     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
4063     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4064   }
4065 #endif
4066   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
4067   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
4068   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4069   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4070   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
4071 
4072   /* compute dofs splitting and neumann boundaries for coarse dofs */
4073   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
4074     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
4075     const PetscInt         *idxs;
4076     ISLocalToGlobalMapping tmap;
4077 
4078     /* create map between primal indices (in local representative ordering) and local primal numbering */
4079     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
4080     /* allocate space for temporary storage */
4081     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
4082     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
4083     /* allocate for IS array */
4084     nisdofs = pcbddc->n_ISForDofsLocal;
4085     nisneu = !!pcbddc->NeumannBoundariesLocal;
4086     nis = nisdofs + nisneu;
4087     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
4088     /* dofs splitting */
4089     for (i=0;i<nisdofs;i++) {
4090       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4091       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4092       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4093       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4094       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4095       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4096       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4097       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4098     }
4099     /* neumann boundaries */
4100     if (pcbddc->NeumannBoundariesLocal) {
4101       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4102       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4103       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4104       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4105       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4106       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4107       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4108       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4109     }
4110     /* free memory */
4111     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4112     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4113     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4114   } else {
4115     nis = 0;
4116     nisdofs = 0;
4117     nisneu = 0;
4118     isarray = NULL;
4119   }
4120   /* destroy no longer needed map */
4121   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4122 
4123   /* restrict on coarse candidates (if needed) */
4124   coarse_mat_is = NULL;
4125   if (csin) {
4126     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4127       if (redist) {
4128         PetscMPIInt rank;
4129         PetscInt    spc,n_spc_p1,dest[1],destsize;
4130 
4131         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4132         spc = active_procs/ncoarse;
4133         n_spc_p1 = active_procs%ncoarse;
4134         if (im_active) {
4135           destsize = 1;
4136           if (rank > n_spc_p1*(spc+1)-1) {
4137             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4138           } else {
4139             dest[0] = rank/(spc+1);
4140           }
4141         } else {
4142           destsize = 0;
4143         }
4144         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4145       } else if (csin_type_simple) {
4146         PetscMPIInt rank;
4147         PetscInt    issize,isidx;
4148 
4149         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4150         if (im_active) {
4151           issize = 1;
4152           isidx = (PetscInt)rank;
4153         } else {
4154           issize = 0;
4155           isidx = -1;
4156         }
4157         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4158       } else { /* get a suitable subassembling pattern from MATIS code */
4159         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4160       }
4161 
4162       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4163       if (!redist || ncoarse <= void_procs) {
4164         PetscInt ncoarse_cand,tissize,*nisindices;
4165         PetscInt *coarse_candidates;
4166         const PetscInt* tisindices;
4167 
4168         /* get coarse candidates' ranks in pc communicator */
4169         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4170         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4171         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4172           if (!coarse_candidates[i]) {
4173             coarse_candidates[ncoarse_cand++]=i;
4174           }
4175         }
4176         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4177 
4178 
4179         if (pcbddc->dbg_flag) {
4180           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4181           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4182           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4183           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4184           for (i=0;i<ncoarse_cand;i++) {
4185             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4186           }
4187           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4188           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4189         }
4190         /* shift the pattern on coarse candidates */
4191         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4192         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4193         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4194         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4195         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4196         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4197         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4198       }
4199       if (pcbddc->dbg_flag) {
4200         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4201         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4202         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4203         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4204       }
4205     }
4206     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4207     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4208   } else {
4209     if (pcbddc->dbg_flag) {
4210       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4211       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4212       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4213     }
4214     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4215     coarse_mat_is = t_coarse_mat_is;
4216   }
4217 
4218   /* create local to global scatters for coarse problem */
4219   if (compute_vecs) {
4220     PetscInt lrows;
4221     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4222     if (coarse_mat_is) {
4223       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4224     } else {
4225       lrows = 0;
4226     }
4227     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4228     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4229     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4230     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4231     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4232   }
4233   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4234   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4235 
4236   /* set defaults for coarse KSP and PC */
4237   if (multilevel_allowed) {
4238     coarse_ksp_type = KSPRICHARDSON;
4239     coarse_pc_type = PCBDDC;
4240   } else {
4241     coarse_ksp_type = KSPPREONLY;
4242     coarse_pc_type = PCREDUNDANT;
4243   }
4244 
4245   /* print some info if requested */
4246   if (pcbddc->dbg_flag) {
4247     if (!multilevel_allowed) {
4248       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4249       if (multilevel_requested) {
4250         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4251       } else if (pcbddc->max_levels) {
4252         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4253       }
4254       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4255     }
4256   }
4257 
4258   /* create the coarse KSP object only once with defaults */
4259   if (coarse_mat_is) {
4260     MatReuse coarse_mat_reuse;
4261     PetscViewer dbg_viewer = NULL;
4262     if (pcbddc->dbg_flag) {
4263       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4264       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4265     }
4266     if (!pcbddc->coarse_ksp) {
4267       char prefix[256],str_level[16];
4268       size_t len;
4269       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4270       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4271       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4272       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4273       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4274       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4275       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4276       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4277       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4278       /* prefix */
4279       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4280       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4281       if (!pcbddc->current_level) {
4282         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4283         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4284       } else {
4285         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4286         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4287         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4288         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4289         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4290         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4291       }
4292       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4293       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4294       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4295       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4296       ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4297       /* allow user customization */
4298       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4299     }
4300     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4301     if (nisdofs) {
4302       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4303       for (i=0;i<nisdofs;i++) {
4304         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4305       }
4306     }
4307     if (nisneu) {
4308       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4309       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4310     }
4311 
4312     /* get some info after set from options */
4313     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4314     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4315     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4316     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4317     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4318       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4319       isbddc = PETSC_FALSE;
4320     }
4321     if (isredundant) {
4322       KSP inner_ksp;
4323       PC inner_pc;
4324       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4325       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4326       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4327     }
4328 
4329     /* assemble coarse matrix */
4330     if (coarse_reuse) {
4331       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4332       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4333       coarse_mat_reuse = MAT_REUSE_MATRIX;
4334     } else {
4335       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4336     }
4337     if (isbddc || isnn) {
4338       if (pcbddc->coarsening_ratio > 1) {
4339         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4340           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4341           if (pcbddc->dbg_flag) {
4342             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4343             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4344             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4345             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4346           }
4347         }
4348         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4349       } else {
4350         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4351         coarse_mat = coarse_mat_is;
4352       }
4353     } else {
4354       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4355     }
4356     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4357 
4358     /* propagate symmetry info of coarse matrix */
4359     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4360     if (pc->pmat->symmetric_set) {
4361       ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr);
4362     }
4363     if (pc->pmat->hermitian_set) {
4364       ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr);
4365     }
4366     if (pc->pmat->spd_set) {
4367       ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr);
4368     }
4369     /* set operators */
4370     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4371     if (pcbddc->dbg_flag) {
4372       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4373     }
4374   } else { /* processes non partecipating to coarse solver (if any) */
4375     coarse_mat = 0;
4376   }
4377   ierr = PetscFree(isarray);CHKERRQ(ierr);
4378 #if 0
4379   {
4380     PetscViewer viewer;
4381     char filename[256];
4382     sprintf(filename,"coarse_mat.m");
4383     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4384     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4385     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4386     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4387   }
4388 #endif
4389 
4390   /* Compute coarse null space (special handling by BDDC only) */
4391   if (pcbddc->NullSpace) {
4392     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4393   }
4394 
4395   if (pcbddc->coarse_ksp) {
4396     Vec crhs,csol;
4397     PetscBool ispreonly;
4398     if (CoarseNullSpace) {
4399       if (isbddc) {
4400         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4401       } else {
4402         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4403       }
4404     }
4405     /* setup coarse ksp */
4406     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4407     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4408     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4409     /* hack */
4410     if (!csol) {
4411       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4412     }
4413     if (!crhs) {
4414       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4415     }
4416     /* Check coarse problem if in debug mode or if solving with an iterative method */
4417     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4418     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4419       KSP       check_ksp;
4420       KSPType   check_ksp_type;
4421       PC        check_pc;
4422       Vec       check_vec,coarse_vec;
4423       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4424       PetscInt  its;
4425       PetscBool compute_eigs;
4426       PetscReal *eigs_r,*eigs_c;
4427       PetscInt  neigs;
4428       const char *prefix;
4429 
4430       /* Create ksp object suitable for estimation of extreme eigenvalues */
4431       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4432       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4433       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4434       if (ispreonly) {
4435         check_ksp_type = KSPPREONLY;
4436         compute_eigs = PETSC_FALSE;
4437       } else {
4438         check_ksp_type = KSPGMRES;
4439         compute_eigs = PETSC_TRUE;
4440       }
4441       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4442       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4443       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4444       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4445       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4446       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4447       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4448       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4449       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4450       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4451       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4452       /* create random vec */
4453       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4454       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4455       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4456       if (CoarseNullSpace) {
4457         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4458       }
4459       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4460       /* solve coarse problem */
4461       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4462       if (CoarseNullSpace) {
4463         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4464       }
4465       /* set eigenvalue estimation if preonly has not been requested */
4466       if (compute_eigs) {
4467         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4468         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4469         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4470         lambda_max = eigs_r[neigs-1];
4471         lambda_min = eigs_r[0];
4472         if (pcbddc->use_coarse_estimates) {
4473           if (lambda_max>lambda_min) {
4474             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4475             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4476           }
4477         }
4478       }
4479 
4480       /* check coarse problem residual error */
4481       if (pcbddc->dbg_flag) {
4482         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4483         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4484         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4485         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4486         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4487         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4488         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4489         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4490         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4491         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4492         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4493         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4494         if (compute_eigs) {
4495           PetscReal lambda_max_s,lambda_min_s;
4496           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4497           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4498           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4499           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4500           for (i=0;i<neigs;i++) {
4501             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4502           }
4503         }
4504         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4505         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4506       }
4507       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4508       if (compute_eigs) {
4509         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4510         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4511       }
4512     }
4513   }
4514   /* print additional info */
4515   if (pcbddc->dbg_flag) {
4516     /* waits until all processes reaches this point */
4517     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4518     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4519     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4520   }
4521 
4522   /* free memory */
4523   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4524   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4525   PetscFunctionReturn(0);
4526 }
4527 
4528 #undef __FUNCT__
4529 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4530 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4531 {
4532   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4533   PC_IS*         pcis = (PC_IS*)pc->data;
4534   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4535   PetscInt       i,local_size,coarse_size=0;
4536   PetscInt       *local_primal_indices=NULL;
4537   PetscInt       *t_local_primal_indices=NULL;
4538   PetscErrorCode ierr;
4539 
4540   PetscFunctionBegin;
4541   /* Compute global number of coarse dofs */
4542   if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) {
4543     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
4544   }
4545   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult,&coarse_size,&t_local_primal_indices);CHKERRQ(ierr);
4546   ierr = PetscMalloc1(pcbddc->local_primal_size,&local_primal_indices);CHKERRQ(ierr);
4547   local_size = 0;
4548   for (i=0;i<pcbddc->local_primal_size_cc;i++) {
4549     PetscInt j;
4550     for (j=0;j<pcbddc->local_primal_ref_mult[i];j++) local_primal_indices[local_size++] = t_local_primal_indices[i] + j;
4551   }
4552   ierr = PetscFree(t_local_primal_indices);CHKERRQ(ierr);
4553   if (local_size != pcbddc->local_primal_size) {
4554     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size);
4555   }
4556 
4557   /* check numbering */
4558   if (pcbddc->dbg_flag) {
4559     PetscScalar coarsesum,*array;
4560     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4561 
4562     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4563     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4564     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4565     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4566     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4567     for (i=0;i<pcbddc->local_primal_size;i++) {
4568       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4569     }
4570     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4571     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4572     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4573     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4574     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4575     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4576     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4577     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4578     for (i=0;i<pcis->n;i++) {
4579       if (array[i] == 1.0) {
4580         set_error = PETSC_TRUE;
4581         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4582       }
4583     }
4584     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4585     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4586     for (i=0;i<pcis->n;i++) {
4587       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4588     }
4589     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4590     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4591     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4592     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4593     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4594     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4595     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4596       PetscInt *gidxs;
4597 
4598       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
4599       ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
4600       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4601       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4602       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4603       for (i=0;i<pcbddc->local_primal_size;i++) {
4604         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr);
4605       }
4606       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4607       ierr = PetscFree(gidxs);CHKERRQ(ierr);
4608     }
4609     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4610     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4611   }
4612   /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */
4613   /* get back data */
4614   *coarse_size_n = coarse_size;
4615   *local_primal_indices_n = local_primal_indices;
4616   PetscFunctionReturn(0);
4617 }
4618 
4619 #undef __FUNCT__
4620 #define __FUNCT__ "PCBDDCGlobalToLocal"
4621 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4622 {
4623   IS             localis_t;
4624   PetscInt       i,lsize,*idxs,n;
4625   PetscScalar    *vals;
4626   PetscErrorCode ierr;
4627 
4628   PetscFunctionBegin;
4629   /* get indices in local ordering exploiting local to global map */
4630   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4631   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4632   for (i=0;i<lsize;i++) vals[i] = 1.0;
4633   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4634   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4635   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4636   if (idxs) { /* multilevel guard */
4637     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4638   }
4639   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4640   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4641   ierr = PetscFree(vals);CHKERRQ(ierr);
4642   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4643   /* now compute set in local ordering */
4644   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4645   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4646   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4647   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4648   for (i=0,lsize=0;i<n;i++) {
4649     if (PetscRealPart(vals[i]) > 0.5) {
4650       lsize++;
4651     }
4652   }
4653   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4654   for (i=0,lsize=0;i<n;i++) {
4655     if (PetscRealPart(vals[i]) > 0.5) {
4656       idxs[lsize++] = i;
4657     }
4658   }
4659   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4660   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4661   *localis = localis_t;
4662   PetscFunctionReturn(0);
4663 }
4664 
4665 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4666 #undef __FUNCT__
4667 #define __FUNCT__ "PCBDDCMatMult_Private"
4668 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4669 {
4670   PCBDDCChange_ctx change_ctx;
4671   PetscErrorCode   ierr;
4672 
4673   PetscFunctionBegin;
4674   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4675   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4676   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4677   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4678   PetscFunctionReturn(0);
4679 }
4680 
4681 #undef __FUNCT__
4682 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4683 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4684 {
4685   PCBDDCChange_ctx change_ctx;
4686   PetscErrorCode   ierr;
4687 
4688   PetscFunctionBegin;
4689   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4690   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4691   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4692   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4693   PetscFunctionReturn(0);
4694 }
4695 
4696 #undef __FUNCT__
4697 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4698 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4699 {
4700   PC_IS               *pcis=(PC_IS*)pc->data;
4701   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4702   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4703   Mat                 S_j;
4704   PetscInt            *used_xadj,*used_adjncy;
4705   PetscBool           free_used_adj;
4706   PetscErrorCode      ierr;
4707 
4708   PetscFunctionBegin;
4709   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4710   free_used_adj = PETSC_FALSE;
4711   if (pcbddc->sub_schurs_layers == -1) {
4712     used_xadj = NULL;
4713     used_adjncy = NULL;
4714   } else {
4715     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4716       used_xadj = pcbddc->mat_graph->xadj;
4717       used_adjncy = pcbddc->mat_graph->adjncy;
4718     } else if (pcbddc->computed_rowadj) {
4719       used_xadj = pcbddc->mat_graph->xadj;
4720       used_adjncy = pcbddc->mat_graph->adjncy;
4721     } else {
4722       PetscBool      flg_row=PETSC_FALSE;
4723       const PetscInt *xadj,*adjncy;
4724       PetscInt       nvtxs;
4725 
4726       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4727       if (flg_row) {
4728         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4729         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4730         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4731         free_used_adj = PETSC_TRUE;
4732       } else {
4733         pcbddc->sub_schurs_layers = -1;
4734         used_xadj = NULL;
4735         used_adjncy = NULL;
4736       }
4737       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4738     }
4739   }
4740 
4741   /* setup sub_schurs data */
4742   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4743   if (!sub_schurs->use_mumps) {
4744     /* pcbddc->ksp_D up to date only if not using MUMPS */
4745     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4746     ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4747   } else {
4748     PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
4749     ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4750   }
4751   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4752 
4753   /* free adjacency */
4754   if (free_used_adj) {
4755     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4756   }
4757   PetscFunctionReturn(0);
4758 }
4759 
4760 #undef __FUNCT__
4761 #define __FUNCT__ "PCBDDCInitSubSchurs"
4762 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4763 {
4764   PC_IS               *pcis=(PC_IS*)pc->data;
4765   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4766   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4767   PCBDDCGraph         graph;
4768   PetscErrorCode      ierr;
4769 
4770   PetscFunctionBegin;
4771   /* attach interface graph for determining subsets */
4772   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4773     IS       verticesIS,verticescomm;
4774     PetscInt vsize,*idxs;
4775 
4776     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4777     ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr);
4778     ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
4779     ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr);
4780     ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
4781     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4782     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4783     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr);
4784     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr);
4785     ierr = ISDestroy(&verticescomm);CHKERRQ(ierr);
4786     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4787 /*
4788     if (pcbddc->dbg_flag) {
4789       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4790     }
4791 */
4792   } else {
4793     graph = pcbddc->mat_graph;
4794   }
4795 
4796   /* sub_schurs init */
4797   ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr);
4798 
4799   /* free graph struct */
4800   if (pcbddc->sub_schurs_rebuild) {
4801     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4802   }
4803   PetscFunctionReturn(0);
4804 }
4805