xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 6366ad86377dce0c10aaab75c941416cd73a9aed)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscReal       *eigs,thresh;
20   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
21   PetscBool       allocated_S_St;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscErrorCode  ierr;
26 
27   PetscFunctionBegin;
28   if (!sub_schurs->use_mumps) {
29     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
30   }
31 
32   if (pcbddc->dbg_flag) {
33     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
34     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
35     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
36     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
37   }
38 
39   if (pcbddc->dbg_flag) {
40     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
41   }
42 
43   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
44     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
45   }
46 
47   /* max size of subsets */
48   mss = 0;
49   for (i=0;i<sub_schurs->n_subs;i++) {
50     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
51       PetscInt subset_size;
52       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
53       mss = PetscMax(mss,subset_size);
54     }
55   }
56 
57   /* min/max and threshold */
58   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
59   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
60   nmax = PetscMax(nmin,nmax);
61   allocated_S_St = PETSC_FALSE;
62   if (nmin) {
63     allocated_S_St = PETSC_TRUE;
64   }
65 
66   /* allocate lapack workspace */
67   cum = cum2 = 0;
68   maxneigs = 0;
69   for (i=0;i<sub_schurs->n_subs;i++) {
70     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
71       PetscInt n,subset_size;
72 
73       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
74       n = PetscMin(subset_size,nmax);
75       cum += subset_size;
76       cum2 += subset_size*n;
77       maxneigs = PetscMax(maxneigs,n);
78     }
79   }
80   if (mss) {
81     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
82       PetscBLASInt B_itype = 1;
83       PetscBLASInt B_N = mss;
84       PetscReal    zero = 0.0;
85       PetscReal    eps = 0.0; /* dlamch? */
86 
87       B_lwork = -1;
88       S = NULL;
89       St = NULL;
90       eigs = NULL;
91       eigv = NULL;
92       B_iwork = NULL;
93       B_ifail = NULL;
94       thresh = 1.0;
95       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
96 #if defined(PETSC_USE_COMPLEX)
97       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
98 #else
99       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
100 #endif
101       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
102       ierr = PetscFPTrapPop();CHKERRQ(ierr);
103     } else {
104         /* TODO */
105     }
106   } else {
107     lwork = 0;
108   }
109 
110   nv = 0;
111   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
112     ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr);
113   }
114   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
115   if (allocated_S_St) {
116     ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr);
117   }
118   ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
119 #if defined(PETSC_USE_COMPLEX)
120   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
121 #endif
122   ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
123                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
124                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
125                       nv+cum,&pcbddc->adaptive_constraints_idxs,
126                       nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
127   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
128 
129   maxneigs = 0;
130   cum = cum2 = cumarray = 0;
131   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
132   pcbddc->adaptive_constraints_data_ptr[0] = 0;
133   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
134     const PetscInt *idxs;
135 
136     ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
137     for (cum=0;cum<nv;cum++) {
138       pcbddc->adaptive_constraints_n[cum] = 1;
139       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
140       pcbddc->adaptive_constraints_data[cum] = 1.0;
141       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
142       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
143     }
144     cum2 = cum;
145     ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
146   }
147 
148   if (mss) { /* multilevel */
149     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
150     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
151   }
152 
153   for (i=0;i<sub_schurs->n_subs;i++) {
154     PetscInt j,subset_size;
155 
156     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
157     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
158       const PetscInt *idxs;
159       PetscReal      infty = PETSC_MAX_REAL;
160       PetscInt       eigs_start = 0;
161       PetscBLASInt   B_N;
162 
163       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
164       if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
165         if (sub_schurs->is_hermitian) {
166           PetscInt j;
167           for (j=0;j<subset_size;j++) {
168             ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
169           }
170           for (j=0;j<subset_size;j++) {
171             ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
172           }
173         } else {
174           ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
175           ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
176         }
177       } else {
178         S = Sarray + cumarray;
179         St = Starray + cumarray;
180       }
181 
182       /* Threshold: this is an heuristic for edges */
183       ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
184       thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold;
185 
186       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
187         PetscBLASInt B_itype = 1;
188         PetscBLASInt B_IL, B_IU;
189         PetscReal    eps = -1.0; /* dlamch? */
190         PetscInt     nmin_s;
191 
192         /* ask for eigenvalues larger than thresh */
193         if (pcbddc->dbg_flag) {
194           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
195         }
196         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
197 #if defined(PETSC_USE_COMPLEX)
198         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
199 #else
200         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
201 #endif
202         ierr = PetscFPTrapPop();CHKERRQ(ierr);
203         if (B_ierr) {
204           if (B_ierr < 0 ) {
205             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
206           } else if (B_ierr <= B_N) {
207             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
208           } else {
209             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
210           }
211         }
212 
213         if (B_neigs > nmax) {
214           if (pcbddc->dbg_flag) {
215             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
216           }
217           eigs_start = B_neigs -nmax;
218           B_neigs = nmax;
219         }
220 
221         nmin_s = PetscMin(nmin,B_N);
222         if (B_neigs < nmin_s) {
223           PetscBLASInt B_neigs2;
224 
225           B_IU = B_N - B_neigs;
226           B_IL = B_N - nmin_s + 1;
227           if (pcbddc->dbg_flag) {
228             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
229           }
230           if (sub_schurs->is_hermitian) {
231             PetscInt j;
232             for (j=0;j<subset_size;j++) {
233               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
234             }
235             for (j=0;j<subset_size;j++) {
236               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
237             }
238           } else {
239             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
240             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
241           }
242           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
243 #if defined(PETSC_USE_COMPLEX)
244           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
245 #else
246           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
247 #endif
248           ierr = PetscFPTrapPop();CHKERRQ(ierr);
249           B_neigs += B_neigs2;
250         }
251         if (B_ierr) {
252           if (B_ierr < 0 ) {
253             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
254           } else if (B_ierr <= B_N) {
255             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
256           } else {
257             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
258           }
259         }
260         if (pcbddc->dbg_flag) {
261           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);
262           for (j=0;j<B_neigs;j++) {
263             if (eigs[j] == 0.0) {
264               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");
265             } else {
266               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);
267             }
268           }
269         }
270       } else {
271           /* TODO */
272       }
273 
274       maxneigs = PetscMax(B_neigs,maxneigs);
275       pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
276       if (B_neigs) {
277         ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
278 
279         if (pcbddc->dbg_flag > 1) {
280           PetscInt ii;
281           for (ii=0;ii<B_neigs;ii++) {
282             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
283             for (j=0;j<B_N;j++) {
284               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
285             }
286           }
287         }
288 #if 0
289         for (j=0;j<B_neigs;j++) {
290           PetscBLASInt Blas_N,Blas_one = 1.0;
291           PetscScalar norm;
292           ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
293           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,
294                                                      &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
295           if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
296             norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
297           } else {
298             norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
299           }
300           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
301         }
302 #endif
303         ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
304         pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
305         pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
306         cum++;
307       }
308       ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
309     }
310     /* shift for next computation */
311     cumarray += subset_size*subset_size;
312   }
313   if (pcbddc->dbg_flag) {
314     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
315   }
316 
317   if (mss) {
318     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
319     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
320     /* destroy matrices (junk) */
321     ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
322     ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
323   }
324   if (allocated_S_St) {
325     ierr = PetscFree2(S,St);CHKERRQ(ierr);
326   }
327   ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
328 #if defined(PETSC_USE_COMPLEX)
329   ierr = PetscFree(rwork);CHKERRQ(ierr);
330 #endif
331   if (pcbddc->dbg_flag) {
332     PetscInt maxneigs_r;
333     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
334     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
335   }
336   PetscFunctionReturn(0);
337 }
338 
339 #undef __FUNCT__
340 #define __FUNCT__ "PCBDDCSetUpSolvers"
341 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
342 {
343   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
344   PetscScalar    *coarse_submat_vals;
345   PetscErrorCode ierr;
346 
347   PetscFunctionBegin;
348   /* Setup local scatters R_to_B and (optionally) R_to_D */
349   /* PCBDDCSetUpLocalWorkVectors should be called first! */
350   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
351 
352   /* Setup local neumann solver ksp_R */
353   /* PCBDDCSetUpLocalScatters should be called first! */
354   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
355 
356   /* Change global null space passed in by the user if change of basis has been requested */
357   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
358     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
359   }
360 
361   /*
362      Setup local correction and local part of coarse basis.
363      Gives back the dense local part of the coarse matrix in column major ordering
364   */
365   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
366 
367   /* Compute total number of coarse nodes and setup coarse solver */
368   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
369 
370   /* free */
371   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
372   PetscFunctionReturn(0);
373 }
374 
375 #undef __FUNCT__
376 #define __FUNCT__ "PCBDDCResetCustomization"
377 PetscErrorCode PCBDDCResetCustomization(PC pc)
378 {
379   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
380   PetscErrorCode ierr;
381 
382   PetscFunctionBegin;
383   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
384   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
385   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
386   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
387   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
388   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
389   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
390   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
391   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
392   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
393   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
394   PetscFunctionReturn(0);
395 }
396 
397 #undef __FUNCT__
398 #define __FUNCT__ "PCBDDCResetTopography"
399 PetscErrorCode PCBDDCResetTopography(PC pc)
400 {
401   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
402   PetscErrorCode ierr;
403 
404   PetscFunctionBegin;
405   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
406   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
407   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
408   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
409   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
410   PetscFunctionReturn(0);
411 }
412 
413 #undef __FUNCT__
414 #define __FUNCT__ "PCBDDCResetSolvers"
415 PetscErrorCode PCBDDCResetSolvers(PC pc)
416 {
417   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
418   PetscScalar    *array;
419   PetscErrorCode ierr;
420 
421   PetscFunctionBegin;
422   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
423   if (pcbddc->coarse_phi_B) {
424     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
425     ierr = PetscFree(array);CHKERRQ(ierr);
426   }
427   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
428   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
429   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
430   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
431   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
432   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
433   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
434   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
435   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
436   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
437   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
438   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
439   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
440   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
441   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
442   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
443   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
444   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
445   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
446   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
447   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
448   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
449   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
450   PetscFunctionReturn(0);
451 }
452 
453 #undef __FUNCT__
454 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
455 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
456 {
457   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
458   PC_IS          *pcis = (PC_IS*)pc->data;
459   VecType        impVecType;
460   PetscInt       n_constraints,n_R,old_size;
461   PetscErrorCode ierr;
462 
463   PetscFunctionBegin;
464   if (!pcbddc->ConstraintMatrix) {
465     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
466   }
467   /* get sizes */
468   n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices;
469   n_R = pcis->n-pcbddc->n_vertices;
470   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
471   /* local work vectors (try to avoid unneeded work)*/
472   /* R nodes */
473   old_size = -1;
474   if (pcbddc->vec1_R) {
475     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
476   }
477   if (n_R != old_size) {
478     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
479     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
480     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
481     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
482     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
483     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
484   }
485   /* local primal dofs */
486   old_size = -1;
487   if (pcbddc->vec1_P) {
488     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
489   }
490   if (pcbddc->local_primal_size != old_size) {
491     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
492     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
493     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
494     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
495   }
496   /* local explicit constraints */
497   old_size = -1;
498   if (pcbddc->vec1_C) {
499     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
500   }
501   if (n_constraints && n_constraints != old_size) {
502     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
503     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
504     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
505     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
506   }
507   PetscFunctionReturn(0);
508 }
509 
510 #undef __FUNCT__
511 #define __FUNCT__ "PCBDDCSetUpCorrection"
512 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
513 {
514   PetscErrorCode  ierr;
515   /* pointers to pcis and pcbddc */
516   PC_IS*          pcis = (PC_IS*)pc->data;
517   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
518   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
519   /* submatrices of local problem */
520   Mat             A_RV,A_VR,A_VV,local_auxmat2_R;
521   /* submatrices of local coarse problem */
522   Mat             S_VV,S_CV,S_VC,S_CC;
523   /* working matrices */
524   Mat             C_CR;
525   /* additional working stuff */
526   PC              pc_R;
527   Mat             F;
528   PetscBool       isLU,isCHOL,isILU;
529 
530   PetscScalar     *coarse_submat_vals; /* TODO: use a PETSc matrix */
531   PetscScalar     *work;
532   PetscInt        *idx_V_B;
533   PetscInt        n,n_vertices,n_constraints;
534   PetscInt        i,n_R,n_D,n_B;
535   PetscBool       unsymmetric_check;
536   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
537   MatType         impMatType;
538   /* some shortcuts to scalars */
539   PetscScalar     one=1.0,m_one=-1.0;
540 
541   PetscFunctionBegin;
542   n_vertices = pcbddc->n_vertices;
543   n_constraints = pcbddc->local_primal_size-n_vertices;
544   /* Set Non-overlapping dimensions */
545   n_B = pcis->n_B;
546   n_D = pcis->n - n_B;
547   n_R = pcis->n - n_vertices;
548 
549   /* Set types for local objects needed by BDDC precondtioner */
550   impMatType = MATSEQDENSE;
551 
552   /* vertices in boundary numbering */
553   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
554   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr);
555   if (i != n_vertices) {
556     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
557   }
558 
559   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
560   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
561   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
562   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
563   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
564   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
565   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
566   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
567   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
568   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
569 
570   unsymmetric_check = PETSC_FALSE;
571   /* allocate workspace */
572   n = 0;
573   if (n_constraints) {
574     n += n_R*n_constraints;
575   }
576   if (n_vertices) {
577     n = PetscMax(2*n_R*n_vertices,n);
578     n = PetscMax((n_R+n_B)*n_vertices,n);
579   }
580   if (!pcbddc->issym) {
581     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
582     unsymmetric_check = PETSC_TRUE;
583   }
584   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
585 
586   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
587   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
588   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
589   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
590   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
591   if (isLU || isILU || isCHOL) {
592     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
593   } else if (sub_schurs->reuse_mumps) {
594     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
595     MatFactorType type;
596 
597     F = reuse_mumps->F;
598     ierr = MatGetFactorType(F,&type);CHKERRQ(ierr);
599     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
600   } else {
601     F = NULL;
602   }
603 
604   /* Precompute stuffs needed for preprocessing and application of BDDC*/
605   if (n_constraints) {
606     Mat         M1,M2,M3;
607     Mat         auxmat;
608     IS          is_aux;
609     PetscScalar *array,*array2;
610 
611     ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
612     ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
613 
614     /* Extract constraints on R nodes: C_{CR}  */
615     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
616     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
617     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr);
618 
619     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
620     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
621     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
622     for (i=0;i<n_constraints;i++) {
623       const PetscScalar *row_cmat_values;
624       const PetscInt    *row_cmat_indices;
625       PetscInt          size_of_constraint,j;
626 
627       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
628       for (j=0;j<size_of_constraint;j++) {
629         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
630       }
631       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
632     }
633     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr);
634     if (F) {
635       Mat B;
636 
637       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
638       ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr);
639       ierr = MatDestroy(&B);CHKERRQ(ierr);
640     } else {
641       PetscScalar *marr;
642 
643       ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
644       for (i=0;i<n_constraints;i++) {
645         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
646         ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr);
647         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
648         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
649         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
650       }
651       ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
652     }
653     if (!pcbddc->switch_static) {
654       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr);
655       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
656       ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
657       for (i=0;i<n_constraints;i++) {
658         ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr);
659         ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr);
660         ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
661         ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
662         ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
663         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
664       }
665       ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
666       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
667       ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
668     } else {
669       ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr);
670       pcbddc->local_auxmat2 = local_auxmat2_R;
671       ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
672     }
673     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
674     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
675     ierr = MatScale(M3,m_one);CHKERRQ(ierr);
676     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
677     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
678     if (isCHOL) {
679       ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr);
680     } else {
681       ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
682     }
683     ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr);
684     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
685     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
686     ierr = MatDestroy(&M2);CHKERRQ(ierr);
687     ierr = MatDestroy(&M3);CHKERRQ(ierr);
688     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
689     ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
690     ierr = MatDestroy(&auxmat);CHKERRQ(ierr);
691     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
692     ierr = MatDestroy(&M1);CHKERRQ(ierr);
693   }
694   /* Get submatrices from subdomain matrix */
695   if (n_vertices) {
696     Mat       newmat;
697     IS        is_aux;
698     PetscInt  ibs,mbs;
699     PetscBool issbaij;
700 
701     if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */
702       IS tis;
703 
704       ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr);
705       ierr = ISSort(tis);CHKERRQ(ierr);
706       ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr);
707       ierr = ISDestroy(&tis);CHKERRQ(ierr);
708     } else {
709       ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
710     }
711     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
712     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
713     if (ibs != mbs) { /* need to convert to SEQAIJ */
714       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
715       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
716       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
717       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
718       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
719       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
720     } else {
721       /* this is safe */
722       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
723       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
724       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
725         ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
726         ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
727         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
728         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
729         ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
730         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
731       } else {
732         ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
733         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
734         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
735       }
736       /* need to convert to SeqAIJ for MatMatMult with dense matrix */
737       ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
738     }
739     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
740   }
741 
742   /* Matrix of coarse basis functions (local) */
743   if (pcbddc->coarse_phi_B) {
744     PetscInt on_B,on_primal,on_D=n_D;
745     if (pcbddc->coarse_phi_D) {
746       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
747     }
748     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
749     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
750       PetscScalar *marray;
751 
752       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
753       ierr = PetscFree(marray);CHKERRQ(ierr);
754       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
755       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
756       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
757       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
758     }
759   }
760 
761   if (!pcbddc->coarse_phi_B) {
762     PetscScalar *marray;
763 
764     n = n_B*pcbddc->local_primal_size;
765     if (pcbddc->switch_static || pcbddc->dbg_flag) {
766       n += n_D*pcbddc->local_primal_size;
767     }
768     if (!pcbddc->issym) {
769       n *= 2;
770     }
771     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
772     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
773     n = n_B*pcbddc->local_primal_size;
774     if (pcbddc->switch_static || pcbddc->dbg_flag) {
775       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
776       n += n_D*pcbddc->local_primal_size;
777     }
778     if (!pcbddc->issym) {
779       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
780       if (pcbddc->switch_static || pcbddc->dbg_flag) {
781         n = n_B*pcbddc->local_primal_size;
782         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
783       }
784     } else {
785       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
786       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
787       if (pcbddc->switch_static || pcbddc->dbg_flag) {
788         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
789         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
790       }
791     }
792   }
793   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
794   /* vertices */
795   if (n_vertices) {
796 
797     if (n_R) {
798       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
799       PetscBLASInt B_N,B_one = 1;
800       PetscScalar  *x,*y;
801 
802       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
803       ierr = MatScale(A_RV,m_one);CHKERRQ(ierr);
804       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
805       if (F) { /* TODO could be optimized for symmetric problems */
806         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
807       } else {
808         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
809         for (i=0;i<n_vertices;i++) {
810           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
811           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
812           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
813           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
814           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
815         }
816         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
817       }
818       ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
819       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
820       if (n_constraints) {
821         Mat B;
822 
823         ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
824         for (i=0;i<n_vertices;i++) {
825           ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
826           ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr);
827           ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
828           ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
829           ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
830           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
831         }
832         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
833         ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
834         ierr = MatDestroy(&B);CHKERRQ(ierr);
835         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
836         ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
837         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
838         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
839         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
840         ierr = MatDestroy(&B);CHKERRQ(ierr);
841       }
842       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
843       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
844       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
845       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
846       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
847       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
848       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
849       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
850       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
851       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
852     } else {
853       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
854     }
855     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
856     /* coarse basis functions */
857     for (i=0;i<n_vertices;i++) {
858       PetscScalar *y;
859 
860       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
861       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
862       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
863       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
864       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
865       y[n_B*i+idx_V_B[i]] = 1.0;
866       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
867       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
868 
869       if (pcbddc->switch_static || pcbddc->dbg_flag) {
870         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
871         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
872         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
873         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
874         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
875         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
876       }
877       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
878     }
879   }
880 
881   if (n_constraints) {
882     Mat B;
883 
884     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
885     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
886     ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
887     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
888     if (n_vertices) {
889       if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
890         ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr);
891       } else {
892         Mat S_VCt;
893 
894         ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr);
895         ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
896         ierr = MatDestroy(&S_VCt);CHKERRQ(ierr);
897       }
898     }
899     ierr = MatDestroy(&B);CHKERRQ(ierr);
900     /* coarse basis functions */
901     for (i=0;i<n_constraints;i++) {
902       PetscScalar *y;
903 
904       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
905       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
906       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
907       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
908       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
909       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
910       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
911       if (pcbddc->switch_static || pcbddc->dbg_flag) {
912         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
913         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
914         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
915         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
916         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
917         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
918       }
919       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
920     }
921   }
922   if (n_constraints) {
923     ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr);
924   }
925 
926   /* compute other basis functions for non-symmetric problems */
927   if (!pcbddc->issym) {
928 
929     if (n_constraints) {
930       Mat S_CCT,B_C;
931 
932       /* this is a lazy thing */
933       ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
934       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
935       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
936       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
937       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
938       if (n_vertices) {
939         Mat B_V,S_VCT;
940 
941         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
942         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
943         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
944         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
945         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
946       }
947       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
948     } else { /* if there are no constraints, reset work */
949       ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
950     }
951     if (n_vertices && n_R) {
952       Mat          A_VRT;
953       PetscScalar  *marray;
954       PetscBLASInt B_N,B_one = 1;
955 
956       ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr);
957       ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
958       ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr);
959       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
960       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one));
961       ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr);
962       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
963     }
964 
965     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
966       for (i=0;i<pcbddc->local_primal_size;i++) {
967         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
968         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
969         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
970         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
971         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
972       }
973     } else {
974       for (i=0;i<pcbddc->local_primal_size;i++) {
975         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
976         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
977         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
978         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
979         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
980       }
981     }
982     /* coarse basis functions */
983     for (i=0;i<pcbddc->local_primal_size;i++) {
984       PetscScalar *y;
985 
986       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
987       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
988       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
989       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
990       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
991       if (i<n_vertices) {
992         y[n_B*i+idx_V_B[i]] = 1.0;
993       }
994       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
995       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
996 
997       if (pcbddc->switch_static || pcbddc->dbg_flag) {
998         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
999         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1000         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1001         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1002         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1003         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1004       }
1005       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1006     }
1007   }
1008   /* free memory */
1009   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
1010   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
1011   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
1012   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
1013   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
1014   ierr = PetscFree(work);CHKERRQ(ierr);
1015   if (n_vertices) {
1016     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1017   }
1018   if (n_constraints) {
1019     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1020   }
1021   /* Checking coarse_sub_mat and coarse basis functios */
1022   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1023   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1024   if (pcbddc->dbg_flag) {
1025     Mat         coarse_sub_mat;
1026     Mat         AUXMAT,TM1,TM2,TM3,TM4;
1027     Mat         coarse_phi_D,coarse_phi_B;
1028     Mat         coarse_psi_D,coarse_psi_B;
1029     Mat         A_II,A_BB,A_IB,A_BI;
1030     Mat         C_B,CPHI;
1031     IS          is_dummy;
1032     Vec         mones;
1033     MatType     checkmattype=MATSEQAIJ;
1034     PetscReal   real_value;
1035 
1036     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
1037     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
1038     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
1039     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
1040     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
1041     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
1042     if (unsymmetric_check) {
1043       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
1044       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
1045     }
1046     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
1047 
1048     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1049     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr);
1050     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1051     if (unsymmetric_check) {
1052       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1053       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1054       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1055       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1056       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1057       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1058       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1059       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1060       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1061       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1062       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1063       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1064     } else {
1065       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1066       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1067       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1068       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1069       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1070       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1071       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1072       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1073     }
1074     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1075     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1076     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1077     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1078     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1079     ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1080     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1081     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d      matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1082 
1083     /* check constraints */
1084     ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr);
1085     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1086     ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1087     ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr);
1088     ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1089     ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1090     ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1091     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1092     ierr = MatDestroy(&C_B);CHKERRQ(ierr);
1093     ierr = MatDestroy(&CPHI);CHKERRQ(ierr);
1094     ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1095     ierr = VecDestroy(&mones);CHKERRQ(ierr);
1096 
1097     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1098     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1099     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1100     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1101     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1102     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1103     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1104     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1105     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1106     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1107     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1108     if (unsymmetric_check) {
1109       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1110       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1111     }
1112     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1113   }
1114   /* get back data */
1115   *coarse_submat_vals_n = coarse_submat_vals;
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 #undef __FUNCT__
1120 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1121 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1122 {
1123   Mat            *work_mat;
1124   IS             isrow_s,iscol_s;
1125   PetscBool      rsorted,csorted;
1126   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1127   PetscErrorCode ierr;
1128 
1129   PetscFunctionBegin;
1130   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1131   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1132   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1133   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1134 
1135   if (!rsorted) {
1136     const PetscInt *idxs;
1137     PetscInt *idxs_sorted,i;
1138 
1139     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1140     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1141     for (i=0;i<rsize;i++) {
1142       idxs_perm_r[i] = i;
1143     }
1144     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1145     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1146     for (i=0;i<rsize;i++) {
1147       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1148     }
1149     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1150     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1151   } else {
1152     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1153     isrow_s = isrow;
1154   }
1155 
1156   if (!csorted) {
1157     if (isrow == iscol) {
1158       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1159       iscol_s = isrow_s;
1160     } else {
1161       const PetscInt *idxs;
1162       PetscInt *idxs_sorted,i;
1163 
1164       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1165       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1166       for (i=0;i<csize;i++) {
1167         idxs_perm_c[i] = i;
1168       }
1169       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1170       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1171       for (i=0;i<csize;i++) {
1172         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1173       }
1174       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1175       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1176     }
1177   } else {
1178     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1179     iscol_s = iscol;
1180   }
1181 
1182   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1183 
1184   if (!rsorted || !csorted) {
1185     Mat      new_mat;
1186     IS       is_perm_r,is_perm_c;
1187 
1188     if (!rsorted) {
1189       PetscInt *idxs_r,i;
1190       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1191       for (i=0;i<rsize;i++) {
1192         idxs_r[idxs_perm_r[i]] = i;
1193       }
1194       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1195       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1196     } else {
1197       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1198     }
1199     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1200 
1201     if (!csorted) {
1202       if (isrow_s == iscol_s) {
1203         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1204         is_perm_c = is_perm_r;
1205       } else {
1206         PetscInt *idxs_c,i;
1207         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1208         for (i=0;i<csize;i++) {
1209           idxs_c[idxs_perm_c[i]] = i;
1210         }
1211         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1212         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1213       }
1214     } else {
1215       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1216     }
1217     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1218 
1219     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1220     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1221     work_mat[0] = new_mat;
1222     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1223     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1224   }
1225 
1226   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1227   *B = work_mat[0];
1228   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1229   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1230   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 #undef __FUNCT__
1235 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1236 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1237 {
1238   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1239   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1240   Mat            new_mat;
1241   IS             is_local,is_global;
1242   PetscInt       local_size;
1243   PetscBool      isseqaij;
1244   PetscErrorCode ierr;
1245 
1246   PetscFunctionBegin;
1247   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1248   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1249   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1250   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1251   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1252   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1253   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1254 
1255   /* check */
1256   if (pcbddc->dbg_flag) {
1257     Vec       x,x_change;
1258     PetscReal error;
1259 
1260     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1261     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1262     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1263     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1264     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1265     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1266     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1267     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1268     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1269     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1270     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1271     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1272     ierr = VecDestroy(&x);CHKERRQ(ierr);
1273     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1274   }
1275 
1276   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1277   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1278   if (isseqaij) {
1279     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1280   } else {
1281     Mat work_mat;
1282     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1283     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1284     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1285   }
1286   ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
1287 #if !defined(PETSC_USE_COMPLEX)
1288   ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,pcbddc->issym);CHKERRQ(ierr);
1289 #endif
1290   /*
1291   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1292   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1293   */
1294   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1295   PetscFunctionReturn(0);
1296 }
1297 
1298 #undef __FUNCT__
1299 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1300 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1301 {
1302   PC_IS*          pcis = (PC_IS*)(pc->data);
1303   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
1304   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1305   IS              is_B_reuse_mumps=NULL;
1306   PetscInt        *idx_R_local;
1307   PetscInt        n_vertices,i,j,n_R,n_D,n_B;
1308   PetscInt        vbs,bs;
1309   PetscBT         bitmask=NULL;
1310   PetscErrorCode  ierr;
1311 
1312   PetscFunctionBegin;
1313   /*
1314     No need to setup local scatters if
1315       - primal space is unchanged
1316         AND
1317       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1318         AND
1319       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1320   */
1321   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1322     PetscFunctionReturn(0);
1323   }
1324   /* destroy old objects */
1325   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1326   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1327   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1328   /* Set Non-overlapping dimensions */
1329   n_B = pcis->n_B;
1330   n_D = pcis->n - n_B;
1331   n_vertices = pcbddc->n_vertices;
1332 
1333   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1334   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1335 
1336   /* create auxiliary bitmask */
1337   if (!sub_schurs->reuse_mumps) {
1338     ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1339     for (i=0;i<n_vertices;i++) {
1340       ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr);
1341     }
1342 
1343     for (i=0, n_R=0; i<pcis->n; i++) {
1344       if (!PetscBTLookup(bitmask,i)) {
1345         idx_R_local[n_R++] = i;
1346       }
1347     }
1348   } else { /* A different ordering and auxiliary scatters are needed we are reusing MUMPS Schur solver */
1349     PCBDDCReuseMumps       reuse_mumps = sub_schurs->reuse_mumps;
1350     const PetscInt         *idxs;
1351     PetscInt               schur_size;
1352     IS                     list[2],tisB_N,tisR;
1353 
1354     if (sub_schurs->is_dir) {
1355       list[0] = sub_schurs->is_Ej_all;
1356       list[1] = sub_schurs->is_dir;
1357       ierr = ISGlobalToLocalMappingApplyIS(pcis->BtoNmap,IS_GTOLM_DROP,sub_schurs->is_dir,&list[1]);CHKERRQ(ierr);
1358       ierr = ISConcatenate(PETSC_COMM_SELF,2,list,&is_B_reuse_mumps);CHKERRQ(ierr);
1359       ierr = ISDestroy(&list[1]);CHKERRQ(ierr);
1360     } else {
1361       ierr = ISDuplicate(sub_schurs->is_Ej_all,&is_B_reuse_mumps);CHKERRQ(ierr);
1362     }
1363     ierr = ISGetLocalSize(is_B_reuse_mumps,&schur_size);CHKERRQ(ierr);
1364     ierr = VecScatterDestroy(&reuse_mumps->correction_scatter_B);CHKERRQ(ierr);
1365     ierr = VecScatterCreate(pcis->vec1_B,is_B_reuse_mumps,reuse_mumps->solB,NULL,&reuse_mumps->correction_scatter_B);CHKERRQ(ierr);
1366     ierr = ISLocalToGlobalMappingApplyIS(sub_schurs->BtoNmap,is_B_reuse_mumps,&tisB_N);CHKERRQ(ierr);
1367     list[0] = pcis->is_I_local;
1368     list[1] = tisB_N;
1369     ierr = ISConcatenate(PETSC_COMM_SELF,2,list,&tisR);CHKERRQ(ierr);
1370     ierr = ISDestroy(&tisB_N);CHKERRQ(ierr);
1371     ierr = ISGetLocalSize(tisR,&n_R);CHKERRQ(ierr);
1372     ierr = ISGetIndices(tisR,&idxs);CHKERRQ(ierr);
1373     ierr = PetscMemcpy(idx_R_local,idxs,n_R*sizeof(PetscInt));CHKERRQ(ierr);
1374     ierr = ISRestoreIndices(tisR,&idxs);CHKERRQ(ierr);
1375     ierr = ISDestroy(&tisR);CHKERRQ(ierr);
1376   }
1377 
1378   /* Block code */
1379   vbs = 1;
1380   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1381   if (bs>1 && !(n_vertices%bs)) {
1382     PetscBool is_blocked = PETSC_TRUE;
1383     PetscInt  *vary;
1384     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1385     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1386     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1387     for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
1388     for (i=0; i<n_vertices/bs; i++) {
1389       if (vary[i]!=0 && vary[i]!=bs) {
1390         is_blocked = PETSC_FALSE;
1391         break;
1392       }
1393     }
1394     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1395       vbs = bs;
1396       for (i=0;i<n_R/vbs;i++) {
1397         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1398       }
1399     }
1400     ierr = PetscFree(vary);CHKERRQ(ierr);
1401   }
1402   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1403   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1404 
1405   /* print some info if requested */
1406   if (pcbddc->dbg_flag) {
1407     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1408     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1409     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1410     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1411     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1412     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1413     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1414   }
1415 
1416   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1417   if (!sub_schurs->reuse_mumps) {
1418     IS       is_aux1,is_aux2;
1419     PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1420 
1421     ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1422     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1423     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1424     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1425     for (i=0; i<n_D; i++) {
1426       ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1427     }
1428     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1429     for (i=0, j=0; i<n_R; i++) {
1430       if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1431         aux_array1[j++] = i;
1432       }
1433     }
1434     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1435     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1436     for (i=0, j=0; i<n_B; i++) {
1437       if (!PetscBTLookup(bitmask,is_indices[i])) {
1438         aux_array2[j++] = i;
1439       }
1440     }
1441     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1442     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1443     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1444     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1445     ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1446 
1447     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1448       ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1449       for (i=0, j=0; i<n_R; i++) {
1450         if (PetscBTLookup(bitmask,idx_R_local[i])) {
1451           aux_array1[j++] = i;
1452         }
1453       }
1454       ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1455       ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1456       ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1457     }
1458     ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1459     ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1460   } else {
1461     IS       tis;
1462     PetscInt schur_size;
1463 
1464     ierr = ISGetLocalSize(is_B_reuse_mumps,&schur_size);CHKERRQ(ierr);
1465     ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr);
1466     ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,is_B_reuse_mumps,&pcbddc->R_to_B);CHKERRQ(ierr);
1467     ierr = ISDestroy(&tis);CHKERRQ(ierr);
1468     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1469       ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr);
1470       ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1471       ierr = ISDestroy(&tis);CHKERRQ(ierr);
1472     }
1473   }
1474   ierr = ISDestroy(&is_B_reuse_mumps);CHKERRQ(ierr);
1475   PetscFunctionReturn(0);
1476 }
1477 
1478 
1479 #undef __FUNCT__
1480 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1481 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1482 {
1483   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1484   PC_IS          *pcis = (PC_IS*)pc->data;
1485   PC             pc_temp;
1486   Mat            A_RR;
1487   MatReuse       reuse;
1488   PetscScalar    m_one = -1.0;
1489   PetscReal      value;
1490   PetscInt       n_D,n_R,ibs,mbs;
1491   PetscBool      use_exact,use_exact_reduced,issbaij;
1492   PetscErrorCode ierr;
1493   /* prefixes stuff */
1494   char           dir_prefix[256],neu_prefix[256],str_level[16];
1495   size_t         len;
1496 
1497   PetscFunctionBegin;
1498 
1499   /* compute prefixes */
1500   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1501   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1502   if (!pcbddc->current_level) {
1503     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1504     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1505     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1506     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1507   } else {
1508     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1509     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1510     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1511     len -= 15; /* remove "pc_bddc_coarse_" */
1512     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1513     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1514     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1515     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1516     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1517     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1518     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1519     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1520   }
1521 
1522   /* DIRICHLET PROBLEM */
1523   if (dirichlet) {
1524     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1525     if (pcbddc->issym) {
1526       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1527     }
1528     /* Matrix for Dirichlet problem is pcis->A_II */
1529     n_D = pcis->n - pcis->n_B;
1530     if (!pcbddc->ksp_D) { /* create object if not yet build */
1531       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1532       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1533       /* default */
1534       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1535       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1536       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1537       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1538       if (issbaij) {
1539         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1540       } else {
1541         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1542       }
1543       /* Allow user's customization */
1544       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1545       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1546     }
1547     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1548     if (sub_schurs->reuse_mumps) {
1549       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1550 
1551       ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr);
1552     }
1553     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1554     if (!n_D) {
1555       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1556       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1557     }
1558     /* Set Up KSP for Dirichlet problem of BDDC */
1559     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1560     /* set ksp_D into pcis data */
1561     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1562     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1563     pcis->ksp_D = pcbddc->ksp_D;
1564   }
1565 
1566   /* NEUMANN PROBLEM */
1567   A_RR = 0;
1568   if (neumann) {
1569     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1570     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1571     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1572     if (pcbddc->ksp_R) { /* already created ksp */
1573       PetscInt nn_R;
1574       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1575       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1576       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1577       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1578         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1579         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1580         reuse = MAT_INITIAL_MATRIX;
1581       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1582         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1583           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1584           reuse = MAT_INITIAL_MATRIX;
1585         } else { /* safe to reuse the matrix */
1586           reuse = MAT_REUSE_MATRIX;
1587         }
1588       }
1589       /* last check */
1590       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1591         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1592         reuse = MAT_INITIAL_MATRIX;
1593       }
1594     } else { /* first time, so we need to create the matrix */
1595       reuse = MAT_INITIAL_MATRIX;
1596     }
1597     /* extract A_RR */
1598     if (!sub_schurs->reuse_mumps) {
1599       ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1600       ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1601       if (ibs != mbs) {
1602         Mat newmat;
1603         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1604         ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1605         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1606       } else {
1607         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1608       }
1609       if (pcbddc->issym) {
1610         ierr = MatSetOption(A_RR,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1611       }
1612     } else {
1613       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1614 
1615       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1616       ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr);
1617       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1618     }
1619     if (!pcbddc->ksp_R) { /* create object if not present */
1620       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1621       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1622       /* default */
1623       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1624       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1625       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1626       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1627       if (issbaij) {
1628         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1629       } else {
1630         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1631       }
1632       /* Allow user's customization */
1633       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1634       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1635     }
1636     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1637     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1638     if (!n_R) {
1639       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1640       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1641     }
1642     /* Reuse MUMPS solver if it is present */
1643     if (sub_schurs->reuse_mumps) {
1644       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1645 
1646       ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr);
1647     }
1648     /* Set Up KSP for Neumann problem of BDDC */
1649     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1650   }
1651   /* free Neumann problem's matrix */
1652   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1653 
1654   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1655   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1656     if (pcbddc->dbg_flag) {
1657       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1658       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1659       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1660     }
1661     if (dirichlet) { /* Dirichlet */
1662       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1663       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1664       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1665       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1666       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1667       /* need to be adapted? */
1668       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1669       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1670       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1671       /* print info */
1672       if (pcbddc->dbg_flag) {
1673         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1674         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1675       }
1676       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1677         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1678       }
1679     }
1680     if (neumann) { /* Neumann */
1681       ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr);
1682       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1683       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1684       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1685       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1686       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1687       /* need to be adapted? */
1688       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1689       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1690       /* print info */
1691       if (pcbddc->dbg_flag) {
1692         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1693         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1694       }
1695       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1696         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1697       }
1698     }
1699   }
1700   PetscFunctionReturn(0);
1701 }
1702 
1703 #undef __FUNCT__
1704 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1705 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
1706 {
1707   PetscErrorCode ierr;
1708   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1709 
1710   PetscFunctionBegin;
1711   ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr);
1712   if (!pcbddc->switch_static) {
1713     if (applytranspose && pcbddc->local_auxmat1) {
1714       ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1715       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1716     }
1717     ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1718     ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1719   } else {
1720     ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1721     ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1722     ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1723     ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1724     if (applytranspose && pcbddc->local_auxmat1) {
1725       ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr);
1726       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1727       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1728       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1729     }
1730   }
1731   if (applytranspose) {
1732     ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1733   } else {
1734     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1735   }
1736   ierr = VecSet(inout_B,0.);CHKERRQ(ierr);
1737   if (!pcbddc->switch_static) {
1738     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1739     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1740     if (!applytranspose && pcbddc->local_auxmat1) {
1741       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1742       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1743     }
1744   } else {
1745     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1746     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1747     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1748     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1749     if (!applytranspose && pcbddc->local_auxmat1) {
1750       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1751       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1752     }
1753     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1754     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1755     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1756     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1757   }
1758   PetscFunctionReturn(0);
1759 }
1760 
1761 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1762 #undef __FUNCT__
1763 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1764 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1765 {
1766   PetscErrorCode ierr;
1767   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1768   PC_IS*            pcis = (PC_IS*)  (pc->data);
1769   const PetscScalar zero = 0.0;
1770 
1771   PetscFunctionBegin;
1772   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1773   if (applytranspose) {
1774     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1775     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1776   } else {
1777     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1778     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1779   }
1780   /* start communications from local primal nodes to rhs of coarse solver */
1781   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1782   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1783   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1784 
1785   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1786   /* TODO remove null space when doing multilevel */
1787   if (pcbddc->coarse_ksp) {
1788     Vec rhs,sol;
1789 
1790     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
1791     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
1792     if (applytranspose) {
1793       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1794     } else {
1795       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1796     }
1797   }
1798 
1799   /* Local solution on R nodes */
1800   if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */
1801     ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr);
1802   }
1803 
1804   /* communications from coarse sol to local primal nodes */
1805   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1806   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1807 
1808   /* Sum contributions from two levels */
1809   if (applytranspose) {
1810     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1811     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1812   } else {
1813     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1814     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1815   }
1816   PetscFunctionReturn(0);
1817 }
1818 
1819 #undef __FUNCT__
1820 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1821 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1822 {
1823   PetscErrorCode ierr;
1824   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1825   PetscScalar    *array;
1826   Vec            from,to;
1827 
1828   PetscFunctionBegin;
1829   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1830     from = pcbddc->coarse_vec;
1831     to = pcbddc->vec1_P;
1832     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1833       Vec tvec;
1834 
1835       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1836       ierr = VecResetArray(tvec);CHKERRQ(ierr);
1837       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1838       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
1839       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
1840       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
1841     }
1842   } else { /* from local to global -> put data in coarse right hand side */
1843     from = pcbddc->vec1_P;
1844     to = pcbddc->coarse_vec;
1845   }
1846   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1847   PetscFunctionReturn(0);
1848 }
1849 
1850 #undef __FUNCT__
1851 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1852 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1853 {
1854   PetscErrorCode ierr;
1855   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1856   PetscScalar    *array;
1857   Vec            from,to;
1858 
1859   PetscFunctionBegin;
1860   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1861     from = pcbddc->coarse_vec;
1862     to = pcbddc->vec1_P;
1863   } else { /* from local to global -> put data in coarse right hand side */
1864     from = pcbddc->vec1_P;
1865     to = pcbddc->coarse_vec;
1866   }
1867   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1868   if (smode == SCATTER_FORWARD) {
1869     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1870       Vec tvec;
1871 
1872       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1873       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
1874       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
1875       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
1876     }
1877   } else {
1878     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
1879      ierr = VecResetArray(from);CHKERRQ(ierr);
1880     }
1881   }
1882   PetscFunctionReturn(0);
1883 }
1884 
1885 /* uncomment for testing purposes */
1886 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1887 #undef __FUNCT__
1888 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1889 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1890 {
1891   PetscErrorCode    ierr;
1892   PC_IS*            pcis = (PC_IS*)(pc->data);
1893   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1894   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1895   /* one and zero */
1896   PetscScalar       one=1.0,zero=0.0;
1897   /* space to store constraints and their local indices */
1898   PetscScalar       *constraints_data;
1899   PetscInt          *constraints_idxs,*constraints_idxs_B;
1900   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
1901   PetscInt          *constraints_n;
1902   /* iterators */
1903   PetscInt          i,j,k,total_counts,total_counts_cc,cum;
1904   /* BLAS integers */
1905   PetscBLASInt      lwork,lierr;
1906   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1907   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1908   /* reuse */
1909   PetscInt          olocal_primal_size,olocal_primal_size_cc;
1910   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
1911   /* change of basis */
1912   PetscBool         qr_needed;
1913   PetscBT           change_basis,qr_needed_idx;
1914   /* auxiliary stuff */
1915   PetscInt          *nnz,*is_indices;
1916   PetscInt          ncc;
1917   /* some quantities */
1918   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1919   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1920 
1921   PetscFunctionBegin;
1922   /* Destroy Mat objects computed previously */
1923   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1924   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1925   /* save info on constraints from previous setup (if any) */
1926   olocal_primal_size = pcbddc->local_primal_size;
1927   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
1928   ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr);
1929   ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
1930   ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
1931   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
1932   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
1933 
1934   /* print some info */
1935   if (pcbddc->dbg_flag) {
1936     IS       vertices;
1937     PetscInt nv,nedges,nfaces;
1938     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
1939     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
1940     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
1941     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1942     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1943     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
1944     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
1945     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
1946     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1947   }
1948 
1949   if (!pcbddc->adaptive_selection) {
1950     IS           ISForVertices,*ISForFaces,*ISForEdges;
1951     MatNullSpace nearnullsp;
1952     const Vec    *nearnullvecs;
1953     Vec          *localnearnullsp;
1954     PetscScalar  *array;
1955     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
1956     PetscBool    nnsp_has_cnst;
1957     /* LAPACK working arrays for SVD or POD */
1958     PetscBool    skip_lapack,boolforchange;
1959     PetscScalar  *work;
1960     PetscReal    *singular_vals;
1961 #if defined(PETSC_USE_COMPLEX)
1962     PetscReal    *rwork;
1963 #endif
1964 #if defined(PETSC_MISSING_LAPACK_GESVD)
1965     PetscScalar  *temp_basis,*correlation_mat;
1966 #else
1967     PetscBLASInt dummy_int=1;
1968     PetscScalar  dummy_scalar=1.;
1969 #endif
1970 
1971     /* Get index sets for faces, edges and vertices from graph */
1972     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1973     /* free unneeded index sets */
1974     if (!pcbddc->use_vertices) {
1975       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1976     }
1977     if (!pcbddc->use_edges) {
1978       for (i=0;i<n_ISForEdges;i++) {
1979         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1980       }
1981       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1982       n_ISForEdges = 0;
1983     }
1984     if (!pcbddc->use_faces) {
1985       for (i=0;i<n_ISForFaces;i++) {
1986         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1987       }
1988       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1989       n_ISForFaces = 0;
1990     }
1991 
1992 #if defined(PETSC_USE_DEBUG)
1993     /* HACK: when solving singular problems not using vertices, a change of basis is mandatory.
1994        Also use_change_of_basis should be consistent among processors */
1995     if (pcbddc->NullSpace) {
1996       PetscBool tbool[2],gbool[2];
1997 
1998       if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) {
1999         pcbddc->use_change_of_basis = PETSC_TRUE;
2000         if (!ISForEdges) {
2001           pcbddc->use_change_on_faces = PETSC_TRUE;
2002         }
2003       }
2004       tbool[0] = pcbddc->use_change_of_basis;
2005       tbool[1] = pcbddc->use_change_on_faces;
2006       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2007       pcbddc->use_change_of_basis = gbool[0];
2008       pcbddc->use_change_on_faces = gbool[1];
2009     }
2010 #endif
2011 
2012     /* check if near null space is attached to global mat */
2013     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
2014     if (nearnullsp) {
2015       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
2016       /* remove any stored info */
2017       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
2018       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2019       /* store information for BDDC solver reuse */
2020       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
2021       pcbddc->onearnullspace = nearnullsp;
2022       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2023       for (i=0;i<nnsp_size;i++) {
2024         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
2025       }
2026     } else { /* if near null space is not provided BDDC uses constants by default */
2027       nnsp_size = 0;
2028       nnsp_has_cnst = PETSC_TRUE;
2029     }
2030     /* get max number of constraints on a single cc */
2031     max_constraints = nnsp_size;
2032     if (nnsp_has_cnst) max_constraints++;
2033 
2034     /*
2035          Evaluate maximum storage size needed by the procedure
2036          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
2037          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
2038          There can be multiple constraints per connected component
2039                                                                                                                                                            */
2040     n_vertices = 0;
2041     if (ISForVertices) {
2042       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
2043     }
2044     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
2045     ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr);
2046 
2047     total_counts = n_ISForFaces+n_ISForEdges;
2048     total_counts *= max_constraints;
2049     total_counts += n_vertices;
2050     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2051 
2052     total_counts = 0;
2053     max_size_of_constraint = 0;
2054     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
2055       IS used_is;
2056       if (i<n_ISForEdges) {
2057         used_is = ISForEdges[i];
2058       } else {
2059         used_is = ISForFaces[i-n_ISForEdges];
2060       }
2061       ierr = ISGetSize(used_is,&j);CHKERRQ(ierr);
2062       total_counts += j;
2063       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
2064     }
2065     ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr);
2066 
2067     /* get local part of global near null space vectors */
2068     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
2069     for (k=0;k<nnsp_size;k++) {
2070       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
2071       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2072       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2073     }
2074 
2075     /* whether or not to skip lapack calls */
2076     skip_lapack = PETSC_TRUE;
2077     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
2078 
2079     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
2080     if (!skip_lapack) {
2081       PetscScalar temp_work;
2082 
2083 #if defined(PETSC_MISSING_LAPACK_GESVD)
2084       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
2085       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
2086       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
2087       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
2088 #if defined(PETSC_USE_COMPLEX)
2089       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
2090 #endif
2091       /* now we evaluate the optimal workspace using query with lwork=-1 */
2092       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2093       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
2094       lwork = -1;
2095       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2096 #if !defined(PETSC_USE_COMPLEX)
2097       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
2098 #else
2099       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
2100 #endif
2101       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2102       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
2103 #else /* on missing GESVD */
2104       /* SVD */
2105       PetscInt max_n,min_n;
2106       max_n = max_size_of_constraint;
2107       min_n = max_constraints;
2108       if (max_size_of_constraint < max_constraints) {
2109         min_n = max_size_of_constraint;
2110         max_n = max_constraints;
2111       }
2112       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
2113 #if defined(PETSC_USE_COMPLEX)
2114       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
2115 #endif
2116       /* now we evaluate the optimal workspace using query with lwork=-1 */
2117       lwork = -1;
2118       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
2119       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
2120       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
2121       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2122 #if !defined(PETSC_USE_COMPLEX)
2123       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
2124 #else
2125       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
2126 #endif
2127       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2128       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
2129 #endif /* on missing GESVD */
2130       /* Allocate optimal workspace */
2131       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
2132       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
2133     }
2134     /* Now we can loop on constraining sets */
2135     total_counts = 0;
2136     constraints_idxs_ptr[0] = 0;
2137     constraints_data_ptr[0] = 0;
2138     /* vertices */
2139     if (n_vertices) {
2140       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2141       if (nnsp_has_cnst) { /* it considers all possible vertices */
2142         ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2143         for (i=0;i<n_vertices;i++) {
2144           constraints_n[total_counts] = 1;
2145           constraints_data[total_counts] = 1.0;
2146           constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2147           constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2148           total_counts++;
2149         }
2150       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
2151         PetscBool used_vertex;
2152         for (i=0;i<n_vertices;i++) {
2153           used_vertex = PETSC_FALSE;
2154           k = 0;
2155           while (!used_vertex && k<nnsp_size) {
2156             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2157             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
2158               constraints_n[total_counts] = 1;
2159               constraints_idxs[total_counts] = is_indices[i];
2160               constraints_data[total_counts] = 1.0;
2161               constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2162               constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2163               total_counts++;
2164               used_vertex = PETSC_TRUE;
2165             }
2166             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2167             k++;
2168           }
2169         }
2170       }
2171       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2172       n_vertices = total_counts;
2173     }
2174 
2175     /* edges and faces */
2176     total_counts_cc = total_counts;
2177     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2178       IS        used_is;
2179       PetscBool idxs_copied = PETSC_FALSE;
2180 
2181       if (ncc<n_ISForEdges) {
2182         used_is = ISForEdges[ncc];
2183         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2184       } else {
2185         used_is = ISForFaces[ncc-n_ISForEdges];
2186         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2187       }
2188       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2189 
2190       ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr);
2191       ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2192       /* change of basis should not be performed on local periodic nodes */
2193       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2194       if (nnsp_has_cnst) {
2195         PetscScalar quad_value;
2196 
2197         ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2198         idxs_copied = PETSC_TRUE;
2199 
2200         if (!pcbddc->use_nnsp_true) {
2201           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2202         } else {
2203           quad_value = 1.0;
2204         }
2205         for (j=0;j<size_of_constraint;j++) {
2206           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
2207         }
2208         temp_constraints++;
2209         total_counts++;
2210       }
2211       for (k=0;k<nnsp_size;k++) {
2212         PetscReal real_value;
2213         PetscScalar *ptr_to_data;
2214 
2215         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2216         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
2217         for (j=0;j<size_of_constraint;j++) {
2218           ptr_to_data[j] = array[is_indices[j]];
2219         }
2220         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2221         /* check if array is null on the connected component */
2222         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2223         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
2224         if (real_value > 0.0) { /* keep indices and values */
2225           temp_constraints++;
2226           total_counts++;
2227           if (!idxs_copied) {
2228             ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2229             idxs_copied = PETSC_TRUE;
2230           }
2231         }
2232       }
2233       ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2234       valid_constraints = temp_constraints;
2235       if (!pcbddc->use_nnsp_true && temp_constraints) {
2236         if (temp_constraints == 1) { /* just normalize the constraint */
2237           PetscScalar norm,*ptr_to_data;
2238 
2239           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2240           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2241           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
2242           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2243           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
2244         } else { /* perform SVD */
2245           PetscReal   tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2246           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2247 
2248 #if defined(PETSC_MISSING_LAPACK_GESVD)
2249           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2250              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2251              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2252                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2253                 from that computed using LAPACKgesvd
2254              -> This is due to a different computation of eigenvectors in LAPACKheev
2255              -> The quality of the POD-computed basis will be the same */
2256           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2257           /* Store upper triangular part of correlation matrix */
2258           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2259           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2260           for (j=0;j<temp_constraints;j++) {
2261             for (k=0;k<j+1;k++) {
2262               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
2263             }
2264           }
2265           /* compute eigenvalues and eigenvectors of correlation matrix */
2266           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2267           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2268 #if !defined(PETSC_USE_COMPLEX)
2269           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2270 #else
2271           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2272 #endif
2273           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2274           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2275           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2276           j = 0;
2277           while (j < temp_constraints && singular_vals[j] < tol) j++;
2278           total_counts = total_counts-j;
2279           valid_constraints = temp_constraints-j;
2280           /* scale and copy POD basis into used quadrature memory */
2281           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2282           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2283           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2284           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2285           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2286           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2287           if (j<temp_constraints) {
2288             PetscInt ii;
2289             for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
2290             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2291             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2292             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2293             for (k=0;k<temp_constraints-j;k++) {
2294               for (ii=0;ii<size_of_constraint;ii++) {
2295                 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2296               }
2297             }
2298           }
2299 #else  /* on missing GESVD */
2300           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2301           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2302           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2303           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2304 #if !defined(PETSC_USE_COMPLEX)
2305           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2306 #else
2307           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2308 #endif
2309           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2310           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2311           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2312           k = temp_constraints;
2313           if (k > size_of_constraint) k = size_of_constraint;
2314           j = 0;
2315           while (j < k && singular_vals[k-j-1] < tol) j++;
2316           valid_constraints = k-j;
2317           total_counts = total_counts-temp_constraints+valid_constraints;
2318 #endif /* on missing GESVD */
2319         }
2320       }
2321       /* update pointers information */
2322       if (valid_constraints) {
2323         constraints_n[total_counts_cc] = valid_constraints;
2324         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
2325         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
2326         /* set change_of_basis flag */
2327         if (boolforchange) {
2328           PetscBTSet(change_basis,total_counts_cc);
2329         }
2330         total_counts_cc++;
2331       }
2332     }
2333     /* free workspace */
2334     if (!skip_lapack) {
2335       ierr = PetscFree(work);CHKERRQ(ierr);
2336 #if defined(PETSC_USE_COMPLEX)
2337       ierr = PetscFree(rwork);CHKERRQ(ierr);
2338 #endif
2339       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2340 #if defined(PETSC_MISSING_LAPACK_GESVD)
2341       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2342       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2343 #endif
2344     }
2345     for (k=0;k<nnsp_size;k++) {
2346       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2347     }
2348     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2349     /* free index sets of faces, edges and vertices */
2350     for (i=0;i<n_ISForFaces;i++) {
2351       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2352     }
2353     if (n_ISForFaces) {
2354       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2355     }
2356     for (i=0;i<n_ISForEdges;i++) {
2357       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2358     }
2359     if (n_ISForEdges) {
2360       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2361     }
2362     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2363   } else {
2364     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2365 
2366     total_counts = 0;
2367     n_vertices = 0;
2368     if (sub_schurs->is_vertices && pcbddc->use_vertices) {
2369       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
2370     }
2371     max_constraints = 0;
2372     total_counts_cc = 0;
2373     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2374       total_counts += pcbddc->adaptive_constraints_n[i];
2375       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
2376       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2377     }
2378     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
2379     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
2380     constraints_idxs = pcbddc->adaptive_constraints_idxs;
2381     constraints_data = pcbddc->adaptive_constraints_data;
2382     /* constraints_n differs from pcbddc->adaptive_constraints_n */
2383     ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr);
2384     total_counts_cc = 0;
2385     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2386       if (pcbddc->adaptive_constraints_n[i]) {
2387         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
2388       }
2389     }
2390 #if 0
2391     printf("Found %d totals (%d)\n",total_counts_cc,total_counts);
2392     for (i=0;i<total_counts_cc;i++) {
2393       printf("const %d, start %d",i,constraints_idxs_ptr[i]);
2394       printf(" end %d:\n",constraints_idxs_ptr[i+1]);
2395       for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) {
2396         printf(" %d",constraints_idxs[j]);
2397       }
2398       printf("\n");
2399       printf("number of cc: %d\n",constraints_n[i]);
2400     }
2401     for (i=0;i<n_vertices;i++) {
2402       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
2403     }
2404     for (i=0;i<sub_schurs->n_subs;i++) {
2405       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2406     }
2407 #endif
2408 
2409     max_size_of_constraint = 0;
2410     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
2411     ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr);
2412     /* Change of basis */
2413     ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr);
2414     if (pcbddc->use_change_of_basis) {
2415       for (i=0;i<sub_schurs->n_subs;i++) {
2416         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2417           ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr);
2418         }
2419       }
2420     }
2421   }
2422   pcbddc->local_primal_size = total_counts;
2423   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2424 
2425   /* map constraints_idxs in boundary numbering */
2426   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr);
2427   if (i != constraints_idxs_ptr[total_counts_cc]) {
2428     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i);
2429   }
2430 
2431   /* Create constraint matrix */
2432   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2433   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2434   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2435 
2436   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2437   /* determine if a QR strategy is needed for change of basis */
2438   qr_needed = PETSC_FALSE;
2439   ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr);
2440   total_primal_vertices=0;
2441   pcbddc->local_primal_size_cc = 0;
2442   for (i=0;i<total_counts_cc;i++) {
2443     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2444     if (size_of_constraint == 1) {
2445       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
2446       pcbddc->local_primal_size_cc += 1;
2447     } else if (PetscBTLookup(change_basis,i)) {
2448       for (k=0;k<constraints_n[i];k++) {
2449         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2450       }
2451       pcbddc->local_primal_size_cc += constraints_n[i];
2452       if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) {
2453         PetscBTSet(qr_needed_idx,i);
2454         qr_needed = PETSC_TRUE;
2455       }
2456     } else {
2457       pcbddc->local_primal_size_cc += 1;
2458     }
2459   }
2460   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
2461   pcbddc->n_vertices = total_primal_vertices;
2462   /* permute indices in order to have a sorted set of vertices */
2463   ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2464 
2465   ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
2466   ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2467   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
2468 
2469   /* nonzero structure of constraint matrix */
2470   /* and get reference dof for local constraints */
2471   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2472   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
2473 
2474   j = total_primal_vertices;
2475   total_counts = total_primal_vertices;
2476   cum = total_primal_vertices;
2477   for (i=n_vertices;i<total_counts_cc;i++) {
2478     if (!PetscBTLookup(change_basis,i)) {
2479       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
2480       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
2481       cum++;
2482       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2483       for (k=0;k<constraints_n[i];k++) {
2484         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2485         nnz[j+k] = size_of_constraint;
2486       }
2487       j += constraints_n[i];
2488     }
2489   }
2490   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2491   ierr = PetscFree(nnz);CHKERRQ(ierr);
2492 
2493   /* set values in constraint matrix */
2494   for (i=0;i<total_primal_vertices;i++) {
2495     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2496   }
2497   total_counts = total_primal_vertices;
2498   for (i=n_vertices;i<total_counts_cc;i++) {
2499     if (!PetscBTLookup(change_basis,i)) {
2500       PetscInt *cols;
2501 
2502       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2503       cols = constraints_idxs+constraints_idxs_ptr[i];
2504       for (k=0;k<constraints_n[i];k++) {
2505         PetscInt    row = total_counts+k;
2506         PetscScalar *vals;
2507 
2508         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
2509         ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2510       }
2511       total_counts += constraints_n[i];
2512     }
2513   }
2514   /* assembling */
2515   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2516   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2517 
2518   /*
2519   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2520   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2521   */
2522   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2523   if (pcbddc->use_change_of_basis) {
2524     /* dual and primal dofs on a single cc */
2525     PetscInt     dual_dofs,primal_dofs;
2526     /* working stuff for GEQRF */
2527     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2528     PetscBLASInt lqr_work;
2529     /* working stuff for UNGQR */
2530     PetscScalar  *gqr_work,lgqr_work_t;
2531     PetscBLASInt lgqr_work;
2532     /* working stuff for TRTRS */
2533     PetscScalar  *trs_rhs;
2534     PetscBLASInt Blas_NRHS;
2535     /* pointers for values insertion into change of basis matrix */
2536     PetscInt     *start_rows,*start_cols;
2537     PetscScalar  *start_vals;
2538     /* working stuff for values insertion */
2539     PetscBT      is_primal;
2540     PetscInt     *aux_primal_numbering_B;
2541     /* matrix sizes */
2542     PetscInt     global_size,local_size;
2543     /* temporary change of basis */
2544     Mat          localChangeOfBasisMatrix;
2545     /* extra space for debugging */
2546     PetscScalar  *dbg_work;
2547 
2548     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2549     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2550     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2551     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2552     /* nonzeros for local mat */
2553     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2554     for (i=0;i<pcis->n;i++) nnz[i]=1;
2555     for (i=n_vertices;i<total_counts_cc;i++) {
2556       if (PetscBTLookup(change_basis,i)) {
2557         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2558         if (PetscBTLookup(qr_needed_idx,i)) {
2559           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
2560         } else {
2561           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
2562           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
2563         }
2564       }
2565     }
2566     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2567     ierr = PetscFree(nnz);CHKERRQ(ierr);
2568     /* Set initial identity in the matrix */
2569     for (i=0;i<pcis->n;i++) {
2570       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2571     }
2572 
2573     if (pcbddc->dbg_flag) {
2574       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2575       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2576     }
2577 
2578 
2579     /* Now we loop on the constraints which need a change of basis */
2580     /*
2581        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2582        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2583 
2584        Basic blocks of change of basis matrix T computed by
2585 
2586           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2587 
2588             | 1        0   ...        0         s_1/S |
2589             | 0        1   ...        0         s_2/S |
2590             |              ...                        |
2591             | 0        ...            1     s_{n-1}/S |
2592             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2593 
2594             with S = \sum_{i=1}^n s_i^2
2595             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2596                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2597 
2598           - QR decomposition of constraints otherwise
2599     */
2600     if (qr_needed) {
2601       /* space to store Q */
2602       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2603       /* first we issue queries for optimal work */
2604       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2605       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2606       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2607       lqr_work = -1;
2608       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2609       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2610       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2611       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2612       lgqr_work = -1;
2613       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2614       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2615       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2616       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2617       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2618       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2619       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2620       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2621       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2622       /* array to store scaling factors for reflectors */
2623       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2624       /* array to store rhs and solution of triangular solver */
2625       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2626       /* allocating workspace for check */
2627       if (pcbddc->dbg_flag) {
2628         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
2629       }
2630     }
2631     /* array to store whether a node is primal or not */
2632     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2633     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2634     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2635     if (i != total_primal_vertices) {
2636       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2637     }
2638     for (i=0;i<total_primal_vertices;i++) {
2639       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2640     }
2641     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2642 
2643     /* loop on constraints and see whether or not they need a change of basis and compute it */
2644     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
2645       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
2646       if (PetscBTLookup(change_basis,total_counts)) {
2647         /* get constraint info */
2648         primal_dofs = constraints_n[total_counts];
2649         dual_dofs = size_of_constraint-primal_dofs;
2650 
2651         if (pcbddc->dbg_flag) {
2652           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr);
2653         }
2654 
2655         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2656 
2657           /* copy quadrature constraints for change of basis check */
2658           if (pcbddc->dbg_flag) {
2659             ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2660           }
2661           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2662           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2663 
2664           /* compute QR decomposition of constraints */
2665           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2666           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2667           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2668           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2669           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2670           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2671           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2672 
2673           /* explictly compute R^-T */
2674           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2675           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2676           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2677           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2678           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2679           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2680           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2681           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2682           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2683           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2684 
2685           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2686           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2687           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2688           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2689           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2690           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2691           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2692           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2693           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2694 
2695           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2696              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2697              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2698           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2699           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2700           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2701           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2702           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2703           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2704           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2705           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
2706           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2707           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2708 
2709           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2710           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
2711           /* insert cols for primal dofs */
2712           for (j=0;j<primal_dofs;j++) {
2713             start_vals = &qr_basis[j*size_of_constraint];
2714             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2715             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2716           }
2717           /* insert cols for dual dofs */
2718           for (j=0,k=0;j<dual_dofs;k++) {
2719             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
2720               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2721               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2722               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2723               j++;
2724             }
2725           }
2726 
2727           /* check change of basis */
2728           if (pcbddc->dbg_flag) {
2729             PetscInt   ii,jj;
2730             PetscBool valid_qr=PETSC_TRUE;
2731             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2732             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2733             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2734             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2735             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2736             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2737             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2738             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
2739             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2740             for (jj=0;jj<size_of_constraint;jj++) {
2741               for (ii=0;ii<primal_dofs;ii++) {
2742                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2743                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2744               }
2745             }
2746             if (!valid_qr) {
2747               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2748               for (jj=0;jj<size_of_constraint;jj++) {
2749                 for (ii=0;ii<primal_dofs;ii++) {
2750                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2751                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2752                   }
2753                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2754                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2755                   }
2756                 }
2757               }
2758             } else {
2759               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2760             }
2761           }
2762         } else { /* simple transformation block */
2763           PetscInt    row,col;
2764           PetscScalar val,norm;
2765 
2766           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2767           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
2768           for (j=0;j<size_of_constraint;j++) {
2769             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
2770             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2771             if (!PetscBTLookup(is_primal,row_B)) {
2772               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
2773               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2774               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2775             } else {
2776               for (k=0;k<size_of_constraint;k++) {
2777                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2778                 if (row != col) {
2779                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
2780                 } else {
2781                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
2782                 }
2783                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2784               }
2785             }
2786           }
2787           if (pcbddc->dbg_flag) {
2788             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2789           }
2790         }
2791       } else {
2792         if (pcbddc->dbg_flag) {
2793           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr);
2794         }
2795       }
2796     }
2797 
2798     /* free workspace */
2799     if (qr_needed) {
2800       if (pcbddc->dbg_flag) {
2801         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
2802       }
2803       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2804       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2805       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2806       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2807       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2808     }
2809     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2810     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2811     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2812 
2813     /* assembling of global change of variable */
2814     {
2815       Mat      tmat;
2816       PetscInt bs;
2817 
2818       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2819       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2820       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2821       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2822       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2823       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2824       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
2825       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
2826       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2827       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2828       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2829       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2830       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2831       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2832       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2833       ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2834       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2835       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2836     }
2837     /* check */
2838     if (pcbddc->dbg_flag) {
2839       PetscReal error;
2840       Vec       x,x_change;
2841 
2842       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2843       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2844       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2845       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2846       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2847       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2848       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2849       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2850       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2851       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2852       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2853       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2854       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2855       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2856       ierr = VecDestroy(&x);CHKERRQ(ierr);
2857       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2858     }
2859 
2860     /* adapt sub_schurs computed (if any) */
2861     if (pcbddc->use_deluxe_scaling) {
2862       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2863       if (sub_schurs->S_Ej_all) {
2864         Mat S_new,tmat;
2865         IS is_all_N;
2866 
2867         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2868         ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
2869         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2870         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2871         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2872         ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2873         sub_schurs->S_Ej_all = S_new;
2874         ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2875         if (sub_schurs->sum_S_Ej_all) {
2876           ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2877           ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2878           ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2879           sub_schurs->sum_S_Ej_all = S_new;
2880           ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2881         }
2882         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2883       }
2884     }
2885     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2886   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2887     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2888     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2889   }
2890 
2891   /* set up change of basis context */
2892   if (pcbddc->ChangeOfBasisMatrix) {
2893     PCBDDCChange_ctx change_ctx;
2894 
2895     if (!pcbddc->new_global_mat) {
2896       PetscInt global_size,local_size;
2897 
2898       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2899       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2900       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2901       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2902       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2903       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2904       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2905       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2906       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2907     } else {
2908       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2909       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2910       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2911     }
2912     if (!pcbddc->user_ChangeOfBasisMatrix) {
2913       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2914       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2915     } else {
2916       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2917       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2918     }
2919     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2920     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2921     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2922     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2923   }
2924 
2925   /* check if a new primal space has been introduced */
2926   pcbddc->new_primal_space_local = PETSC_TRUE;
2927   if (olocal_primal_size == pcbddc->local_primal_size) {
2928     ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2929     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2930     if (!pcbddc->new_primal_space_local) {
2931       ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2932       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2933     }
2934   }
2935   ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr);
2936   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2937   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2938 
2939   /* flush dbg viewer */
2940   if (pcbddc->dbg_flag) {
2941     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2942   }
2943 
2944   /* free workspace */
2945   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2946   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2947   if (!pcbddc->adaptive_selection) {
2948     ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr);
2949     ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr);
2950   } else {
2951     ierr = PetscFree5(pcbddc->adaptive_constraints_n,
2952                       pcbddc->adaptive_constraints_idxs_ptr,
2953                       pcbddc->adaptive_constraints_data_ptr,
2954                       pcbddc->adaptive_constraints_idxs,
2955                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2956     ierr = PetscFree(constraints_n);CHKERRQ(ierr);
2957     ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr);
2958   }
2959   PetscFunctionReturn(0);
2960 }
2961 
2962 #undef __FUNCT__
2963 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2964 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2965 {
2966   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2967   PC_IS       *pcis = (PC_IS*)pc->data;
2968   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2969   PetscInt    ierr,i,vertex_size,N;
2970   PetscViewer viewer=pcbddc->dbg_viewer;
2971 
2972   PetscFunctionBegin;
2973   /* Reset previously computed graph */
2974   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2975   /* Init local Graph struct */
2976   ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr);
2977   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);CHKERRQ(ierr);
2978 
2979   /* Check validity of the csr graph passed in by the user */
2980   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2981     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2982   }
2983 
2984   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2985   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2986     PetscInt  *xadj,*adjncy;
2987     PetscInt  nvtxs;
2988     PetscBool flg_row=PETSC_FALSE;
2989 
2990     if (pcbddc->use_local_adj) {
2991 
2992       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2993       if (flg_row) {
2994         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2995         pcbddc->computed_rowadj = PETSC_TRUE;
2996       }
2997       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2998     } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */
2999       IS                     is_dummy;
3000       ISLocalToGlobalMapping l2gmap_dummy;
3001       PetscInt               j,sum;
3002       PetscInt               *cxadj,*cadjncy;
3003       const PetscInt         *idxs;
3004       PCBDDCGraph            graph;
3005       PetscBT                is_on_boundary;
3006 
3007       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
3008       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
3009       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
3010       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
3011       ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr);
3012       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
3013       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3014       if (flg_row) {
3015         graph->xadj = xadj;
3016         graph->adjncy = adjncy;
3017       }
3018       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
3019       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
3020       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3021 
3022       if (pcbddc->dbg_flag) {
3023         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr);
3024         for (i=0;i<graph->ncc;i++) {
3025           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
3026         }
3027       }
3028 
3029       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
3030       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3031       for (i=0;i<pcis->n_B;i++) {
3032         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
3033       }
3034       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3035 
3036       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
3037       sum = 0;
3038       for (i=0;i<graph->ncc;i++) {
3039         PetscInt sizecc = 0;
3040         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3041           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3042             sizecc++;
3043           }
3044         }
3045         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3046           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3047             cxadj[graph->queue[j]] = sizecc;
3048           }
3049         }
3050         sum += sizecc*sizecc;
3051       }
3052       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
3053       sum = 0;
3054       for (i=0;i<pcis->n;i++) {
3055         PetscInt temp = cxadj[i];
3056         cxadj[i] = sum;
3057         sum += temp;
3058       }
3059       cxadj[pcis->n] = sum;
3060       for (i=0;i<graph->ncc;i++) {
3061         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3062           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3063             PetscInt k,sizecc = 0;
3064             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
3065               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
3066                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
3067                 sizecc++;
3068               }
3069             }
3070           }
3071         }
3072       }
3073       if (sum) {
3074         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
3075       } else {
3076         ierr = PetscFree(cxadj);CHKERRQ(ierr);
3077         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
3078       }
3079       graph->xadj = 0;
3080       graph->adjncy = 0;
3081       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
3082       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
3083     }
3084   }
3085   if (pcbddc->dbg_flag) {
3086     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3087   }
3088 
3089   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
3090   vertex_size = 1;
3091   if (pcbddc->user_provided_isfordofs) {
3092     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
3093       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3094       for (i=0;i<pcbddc->n_ISForDofs;i++) {
3095         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3096         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
3097       }
3098       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
3099       pcbddc->n_ISForDofs = 0;
3100       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
3101     }
3102     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
3103     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
3104   } else {
3105     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
3106       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
3107       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3108       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3109         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3110       }
3111     }
3112   }
3113 
3114   /* Setup of Graph */
3115   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3116     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
3117   }
3118   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3119     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
3120   }
3121   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr);
3122 
3123   /* Graph's connected components analysis */
3124   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
3125 
3126   /* print some info to stdout */
3127   if (pcbddc->dbg_flag) {
3128     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr);
3129   }
3130 
3131   /* mark topography has done */
3132   pcbddc->recompute_topography = PETSC_FALSE;
3133   PetscFunctionReturn(0);
3134 }
3135 
3136 #undef __FUNCT__
3137 #define __FUNCT__ "PCBDDCSubsetNumbering"
3138 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
3139 {
3140   Vec            local_vec,global_vec;
3141   IS             seqis,paris;
3142   VecScatter     scatter_ctx;
3143   PetscScalar    *array;
3144   PetscInt       *temp_global_dofs;
3145   PetscScalar    globalsum;
3146   PetscInt       i,j,s;
3147   PetscInt       nlocals,first_index,old_index,max_local,max_global;
3148   PetscMPIInt    rank_prec_comm,size_prec_comm;
3149   PetscInt       *dof_sizes,*dof_displs;
3150   PetscBool      first_found;
3151   PetscErrorCode ierr;
3152 
3153   PetscFunctionBegin;
3154   /* mpi buffers */
3155   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
3156   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
3157   j = ( !rank_prec_comm ? size_prec_comm : 0);
3158   ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr);
3159   /* get maximum size of subset */
3160   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
3161   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
3162   max_local = 0;
3163   for (i=0;i<n_local_dofs;i++) {
3164     if (max_local < temp_global_dofs[i] ) {
3165       max_local = temp_global_dofs[i];
3166     }
3167   }
3168   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
3169   max_global++;
3170   max_local = 0;
3171   for (i=0;i<n_local_dofs;i++) {
3172     if (max_local < local_dofs[i] ) {
3173       max_local = local_dofs[i];
3174     }
3175   }
3176   max_local++;
3177   /* allocate workspace */
3178   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
3179   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
3180   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
3181   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
3182   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
3183   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
3184   /* create scatter */
3185   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
3186   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
3187   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
3188   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
3189   ierr = ISDestroy(&paris);CHKERRQ(ierr);
3190   /* init array */
3191   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
3192   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3193   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3194   if (local_dofs_mult) {
3195     for (i=0;i<n_local_dofs;i++) {
3196       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
3197     }
3198   } else {
3199     for (i=0;i<n_local_dofs;i++) {
3200       array[local_dofs[i]]=1.0;
3201     }
3202   }
3203   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3204   /* scatter into global vec and get total number of global dofs */
3205   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3206   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3207   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
3208   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
3209   /* Fill global_vec with cumulative function for global numbering */
3210   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
3211   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
3212   nlocals = 0;
3213   first_index = -1;
3214   first_found = PETSC_FALSE;
3215   for (i=0;i<s;i++) {
3216     if (!first_found && PetscRealPart(array[i]) > 0.1) {
3217       first_found = PETSC_TRUE;
3218       first_index = i;
3219     }
3220     nlocals += (PetscInt)PetscRealPart(array[i]);
3221   }
3222   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3223   if (!rank_prec_comm) {
3224     dof_displs[0]=0;
3225     for (i=1;i<size_prec_comm;i++) {
3226       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3227     }
3228   }
3229   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3230   if (first_found) {
3231     array[first_index] += (PetscScalar)nlocals;
3232     old_index = first_index;
3233     for (i=first_index+1;i<s;i++) {
3234       if (PetscRealPart(array[i]) > 0.1) {
3235         array[i] += array[old_index];
3236         old_index = i;
3237       }
3238     }
3239   }
3240   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
3241   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3242   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3243   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3244   /* get global ordering of local dofs */
3245   ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3246   if (local_dofs_mult) {
3247     for (i=0;i<n_local_dofs;i++) {
3248       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
3249     }
3250   } else {
3251     for (i=0;i<n_local_dofs;i++) {
3252       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
3253     }
3254   }
3255   ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3256   /* free workspace */
3257   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
3258   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
3259   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
3260   ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr);
3261   /* return pointer to global ordering of local dofs */
3262   *global_numbering_subset = temp_global_dofs;
3263   PetscFunctionReturn(0);
3264 }
3265 
3266 #undef __FUNCT__
3267 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3268 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3269 {
3270   PetscInt       i,j;
3271   PetscScalar    *alphas;
3272   PetscErrorCode ierr;
3273 
3274   PetscFunctionBegin;
3275   /* this implements stabilized Gram-Schmidt */
3276   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3277   for (i=0;i<n;i++) {
3278     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3279     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3280     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3281   }
3282   ierr = PetscFree(alphas);CHKERRQ(ierr);
3283   PetscFunctionReturn(0);
3284 }
3285 
3286 #undef __FUNCT__
3287 #define __FUNCT__ "MatISGetSubassemblingPattern"
3288 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
3289 {
3290   Mat             subdomain_adj;
3291   IS              new_ranks,ranks_send_to;
3292   MatPartitioning partitioner;
3293   Mat_IS          *matis;
3294   PetscInt        n_neighs,*neighs,*n_shared,**shared;
3295   PetscInt        prank;
3296   PetscMPIInt     size,rank,color;
3297   PetscInt        *xadj,*adjncy,*oldranks;
3298   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
3299   PetscInt        i,local_size,threshold=0;
3300   PetscErrorCode  ierr;
3301   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3302   PetscSubcomm    subcomm;
3303 
3304   PetscFunctionBegin;
3305   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3306   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3307   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3308 
3309   /* Get info on mapping */
3310   matis = (Mat_IS*)(mat->data);
3311   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3312   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3313 
3314   /* build local CSR graph of subdomains' connectivity */
3315   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3316   xadj[0] = 0;
3317   xadj[1] = PetscMax(n_neighs-1,0);
3318   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3319   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3320 
3321   if (threshold) {
3322     PetscInt xadj_count = 0;
3323     for (i=1;i<n_neighs;i++) {
3324       if (n_shared[i] > threshold) {
3325         adjncy[xadj_count] = neighs[i];
3326         adjncy_wgt[xadj_count] = n_shared[i];
3327         xadj_count++;
3328       }
3329     }
3330     xadj[1] = xadj_count;
3331   } else {
3332     if (xadj[1]) {
3333       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3334       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3335     }
3336   }
3337   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3338   if (use_square) {
3339     for (i=0;i<xadj[1];i++) {
3340       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3341     }
3342   }
3343   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3344 
3345   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3346 
3347   /*
3348     Restrict work on active processes only.
3349   */
3350   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3351   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3352   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3353   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3354   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3355   if (color) {
3356     ierr = PetscFree(xadj);CHKERRQ(ierr);
3357     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3358     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3359   } else {
3360     PetscInt coarsening_ratio;
3361     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
3362     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3363     prank = rank;
3364     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
3365     /*
3366     for (i=0;i<size;i++) {
3367       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3368     }
3369     */
3370     for (i=0;i<xadj[1];i++) {
3371       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3372     }
3373     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3374     ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3375     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3376 
3377     /* Partition */
3378     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
3379     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3380     if (use_vwgt) {
3381       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3382       v_wgt[0] = local_size;
3383       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3384     }
3385     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3386     coarsening_ratio = size/n_subdomains;
3387     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3388     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3389     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3390     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3391 
3392     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3393     if (contiguous) {
3394       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3395     } else {
3396       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3397     }
3398     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3399     /* clean up */
3400     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3401     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3402     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3403     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3404   }
3405   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3406 
3407   /* assemble parallel IS for sends */
3408   i = 1;
3409   if (color) i=0;
3410   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3411 
3412   /* get back IS */
3413   *is_sends = ranks_send_to;
3414   PetscFunctionReturn(0);
3415 }
3416 
3417 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3418 
3419 #undef __FUNCT__
3420 #define __FUNCT__ "MatISSubassemble"
3421 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3422 {
3423   Mat                    local_mat;
3424   Mat_IS                 *matis;
3425   IS                     is_sends_internal;
3426   PetscInt               rows,cols,new_local_rows;
3427   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3428   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3429   ISLocalToGlobalMapping l2gmap;
3430   PetscInt*              l2gmap_indices;
3431   const PetscInt*        is_indices;
3432   MatType                new_local_type;
3433   /* buffers */
3434   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3435   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3436   PetscInt               *recv_buffer_idxs_local;
3437   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3438   /* MPI */
3439   MPI_Comm               comm,comm_n;
3440   PetscSubcomm           subcomm;
3441   PetscMPIInt            n_sends,n_recvs,commsize;
3442   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3443   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3444   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3445   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3446   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3447   PetscErrorCode         ierr;
3448 
3449   PetscFunctionBegin;
3450   /* TODO: add missing checks */
3451   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3452   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3453   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3454   PetscValidLogicalCollectiveInt(mat,nis,7);
3455   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3456   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3457   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3458   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3459   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3460   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3461   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3462   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3463     PetscInt mrows,mcols,mnrows,mncols;
3464     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3465     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3466     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3467     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3468     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3469     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3470   }
3471   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3472   PetscValidLogicalCollectiveInt(mat,bs,0);
3473   /* prepare IS for sending if not provided */
3474   if (!is_sends) {
3475     PetscBool pcontig = PETSC_TRUE;
3476     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3477     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3478   } else {
3479     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3480     is_sends_internal = is_sends;
3481   }
3482 
3483   /* get pointer of MATIS data */
3484   matis = (Mat_IS*)mat->data;
3485 
3486   /* get comm */
3487   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3488 
3489   /* compute number of sends */
3490   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3491   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3492 
3493   /* compute number of receives */
3494   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3495   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3496   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3497   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3498   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3499   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3500   ierr = PetscFree(iflags);CHKERRQ(ierr);
3501 
3502   /* restrict comm if requested */
3503   subcomm = 0;
3504   destroy_mat = PETSC_FALSE;
3505   if (restrict_comm) {
3506     PetscMPIInt color,subcommsize;
3507 
3508     color = 0;
3509     if (!n_recvs && n_sends) color = 1; /* processes sending and not receiving anything will not partecipate in new comm */
3510     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3511     subcommsize = commsize - subcommsize;
3512     /* check if reuse has been requested */
3513     if (reuse == MAT_REUSE_MATRIX) {
3514       if (*mat_n) {
3515         PetscMPIInt subcommsize2;
3516         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3517         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3518         comm_n = PetscObjectComm((PetscObject)*mat_n);
3519       } else {
3520         comm_n = PETSC_COMM_SELF;
3521       }
3522     } else { /* MAT_INITIAL_MATRIX */
3523       PetscMPIInt rank;
3524 
3525       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3526       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3527       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3528       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3529       comm_n = PetscSubcommChild(subcomm);
3530     }
3531     /* flag to destroy *mat_n if not significative */
3532     if (color) destroy_mat = PETSC_TRUE;
3533   } else {
3534     comm_n = comm;
3535   }
3536 
3537   /* prepare send/receive buffers */
3538   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3539   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3540   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3541   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3542   if (nis) {
3543     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3544   }
3545 
3546   /* Get data from local matrices */
3547   if (!isdense) {
3548     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3549     /* TODO: See below some guidelines on how to prepare the local buffers */
3550     /*
3551        send_buffer_vals should contain the raw values of the local matrix
3552        send_buffer_idxs should contain:
3553        - MatType_PRIVATE type
3554        - PetscInt        size_of_l2gmap
3555        - PetscInt        global_row_indices[size_of_l2gmap]
3556        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3557     */
3558   } else {
3559     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3560     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3561     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3562     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3563     send_buffer_idxs[1] = i;
3564     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3565     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3566     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3567     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3568     for (i=0;i<n_sends;i++) {
3569       ilengths_vals[is_indices[i]] = len*len;
3570       ilengths_idxs[is_indices[i]] = len+2;
3571     }
3572   }
3573   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3574   /* additional is (if any) */
3575   if (nis) {
3576     PetscMPIInt psum;
3577     PetscInt j;
3578     for (j=0,psum=0;j<nis;j++) {
3579       PetscInt plen;
3580       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3581       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3582       psum += len+1; /* indices + lenght */
3583     }
3584     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3585     for (j=0,psum=0;j<nis;j++) {
3586       PetscInt plen;
3587       const PetscInt *is_array_idxs;
3588       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3589       send_buffer_idxs_is[psum] = plen;
3590       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3591       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3592       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3593       psum += plen+1; /* indices + lenght */
3594     }
3595     for (i=0;i<n_sends;i++) {
3596       ilengths_idxs_is[is_indices[i]] = psum;
3597     }
3598     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3599   }
3600 
3601   buf_size_idxs = 0;
3602   buf_size_vals = 0;
3603   buf_size_idxs_is = 0;
3604   for (i=0;i<n_recvs;i++) {
3605     buf_size_idxs += (PetscInt)olengths_idxs[i];
3606     buf_size_vals += (PetscInt)olengths_vals[i];
3607     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3608   }
3609   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3610   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3611   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3612 
3613   /* get new tags for clean communications */
3614   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3615   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3616   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3617 
3618   /* allocate for requests */
3619   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3620   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3621   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3622   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3623   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3624   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3625 
3626   /* communications */
3627   ptr_idxs = recv_buffer_idxs;
3628   ptr_vals = recv_buffer_vals;
3629   ptr_idxs_is = recv_buffer_idxs_is;
3630   for (i=0;i<n_recvs;i++) {
3631     source_dest = onodes[i];
3632     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3633     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3634     ptr_idxs += olengths_idxs[i];
3635     ptr_vals += olengths_vals[i];
3636     if (nis) {
3637       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3638       ptr_idxs_is += olengths_idxs_is[i];
3639     }
3640   }
3641   for (i=0;i<n_sends;i++) {
3642     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3643     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3644     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3645     if (nis) {
3646       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3647     }
3648   }
3649   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3650   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3651 
3652   /* assemble new l2g map */
3653   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3654   ptr_idxs = recv_buffer_idxs;
3655   new_local_rows = 0;
3656   for (i=0;i<n_recvs;i++) {
3657     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3658     ptr_idxs += olengths_idxs[i];
3659   }
3660   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3661   ptr_idxs = recv_buffer_idxs;
3662   new_local_rows = 0;
3663   for (i=0;i<n_recvs;i++) {
3664     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3665     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3666     ptr_idxs += olengths_idxs[i];
3667   }
3668   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3669   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3670   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3671 
3672   /* infer new local matrix type from received local matrices type */
3673   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3674   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3675   if (n_recvs) {
3676     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3677     ptr_idxs = recv_buffer_idxs;
3678     for (i=0;i<n_recvs;i++) {
3679       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3680         new_local_type_private = MATAIJ_PRIVATE;
3681         break;
3682       }
3683       ptr_idxs += olengths_idxs[i];
3684     }
3685     switch (new_local_type_private) {
3686       case MATDENSE_PRIVATE:
3687         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3688           new_local_type = MATSEQAIJ;
3689           bs = 1;
3690         } else { /* if I receive only 1 dense matrix */
3691           new_local_type = MATSEQDENSE;
3692           bs = 1;
3693         }
3694         break;
3695       case MATAIJ_PRIVATE:
3696         new_local_type = MATSEQAIJ;
3697         bs = 1;
3698         break;
3699       case MATBAIJ_PRIVATE:
3700         new_local_type = MATSEQBAIJ;
3701         break;
3702       case MATSBAIJ_PRIVATE:
3703         new_local_type = MATSEQSBAIJ;
3704         break;
3705       default:
3706         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3707         break;
3708     }
3709   } else { /* by default, new_local_type is seqdense */
3710     new_local_type = MATSEQDENSE;
3711     bs = 1;
3712   }
3713 
3714   /* create MATIS object if needed */
3715   if (reuse == MAT_INITIAL_MATRIX) {
3716     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3717     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3718   } else {
3719     /* it also destroys the local matrices */
3720     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3721   }
3722   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3723   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3724 
3725   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3726 
3727   /* Global to local map of received indices */
3728   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3729   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3730   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3731 
3732   /* restore attributes -> type of incoming data and its size */
3733   buf_size_idxs = 0;
3734   for (i=0;i<n_recvs;i++) {
3735     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3736     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3737     buf_size_idxs += (PetscInt)olengths_idxs[i];
3738   }
3739   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3740 
3741   /* set preallocation */
3742   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3743   if (!newisdense) {
3744     PetscInt *new_local_nnz=0;
3745 
3746     ptr_vals = recv_buffer_vals;
3747     ptr_idxs = recv_buffer_idxs_local;
3748     if (n_recvs) {
3749       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3750     }
3751     for (i=0;i<n_recvs;i++) {
3752       PetscInt j;
3753       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3754         for (j=0;j<*(ptr_idxs+1);j++) {
3755           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3756         }
3757       } else {
3758         /* TODO */
3759       }
3760       ptr_idxs += olengths_idxs[i];
3761     }
3762     if (new_local_nnz) {
3763       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3764       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3765       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3766       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3767       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3768       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3769     } else {
3770       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3771     }
3772     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3773   } else {
3774     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3775   }
3776 
3777   /* set values */
3778   ptr_vals = recv_buffer_vals;
3779   ptr_idxs = recv_buffer_idxs_local;
3780   for (i=0;i<n_recvs;i++) {
3781     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3782       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3783       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3784       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3785       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3786       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3787     } else {
3788       /* TODO */
3789     }
3790     ptr_idxs += olengths_idxs[i];
3791     ptr_vals += olengths_vals[i];
3792   }
3793   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3794   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3795   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3796   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3797   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3798   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3799 
3800 #if 0
3801   if (!restrict_comm) { /* check */
3802     Vec       lvec,rvec;
3803     PetscReal infty_error;
3804 
3805     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3806     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3807     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3808     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3809     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3810     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3811     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3812     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3813     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3814   }
3815 #endif
3816 
3817   /* assemble new additional is (if any) */
3818   if (nis) {
3819     PetscInt **temp_idxs,*count_is,j,psum;
3820 
3821     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3822     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3823     ptr_idxs = recv_buffer_idxs_is;
3824     psum = 0;
3825     for (i=0;i<n_recvs;i++) {
3826       for (j=0;j<nis;j++) {
3827         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3828         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3829         psum += plen;
3830         ptr_idxs += plen+1; /* shift pointer to received data */
3831       }
3832     }
3833     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3834     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3835     for (i=1;i<nis;i++) {
3836       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3837     }
3838     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3839     ptr_idxs = recv_buffer_idxs_is;
3840     for (i=0;i<n_recvs;i++) {
3841       for (j=0;j<nis;j++) {
3842         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3843         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3844         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3845         ptr_idxs += plen+1; /* shift pointer to received data */
3846       }
3847     }
3848     for (i=0;i<nis;i++) {
3849       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3850       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3851       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3852     }
3853     ierr = PetscFree(count_is);CHKERRQ(ierr);
3854     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3855     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3856   }
3857   /* free workspace */
3858   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3859   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3860   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3861   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3862   if (isdense) {
3863     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3864     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3865   } else {
3866     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3867   }
3868   if (nis) {
3869     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3870     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3871   }
3872   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3873   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3874   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3875   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3876   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3877   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3878   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3879   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3880   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3881   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3882   ierr = PetscFree(onodes);CHKERRQ(ierr);
3883   if (nis) {
3884     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3885     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3886     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3887   }
3888   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3889   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3890     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3891     for (i=0;i<nis;i++) {
3892       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3893     }
3894   }
3895   PetscFunctionReturn(0);
3896 }
3897 
3898 /* temporary hack into ksp private data structure */
3899 #include <petsc/private/kspimpl.h>
3900 
3901 #undef __FUNCT__
3902 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3903 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3904 {
3905   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3906   PC_IS                  *pcis = (PC_IS*)pc->data;
3907   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3908   MatNullSpace           CoarseNullSpace=NULL;
3909   ISLocalToGlobalMapping coarse_islg;
3910   IS                     coarse_is,*isarray;
3911   PetscInt               i,im_active=-1,active_procs=-1;
3912   PetscInt               nis,nisdofs,nisneu;
3913   PC                     pc_temp;
3914   PCType                 coarse_pc_type;
3915   KSPType                coarse_ksp_type;
3916   PetscBool              multilevel_requested,multilevel_allowed;
3917   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3918   Mat                    t_coarse_mat_is;
3919   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3920   PetscMPIInt            all_procs;
3921   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3922   PetscBool              compute_vecs = PETSC_FALSE;
3923   PetscScalar            *array;
3924   PetscErrorCode         ierr;
3925 
3926   PetscFunctionBegin;
3927   /* Assign global numbering to coarse dofs */
3928   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3929     PetscInt ocoarse_size;
3930     compute_vecs = PETSC_TRUE;
3931     ocoarse_size = pcbddc->coarse_size;
3932     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3933     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3934     /* see if we can avoid some work */
3935     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3936       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3937         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3938         coarse_reuse = PETSC_FALSE;
3939       } else { /* we can safely reuse already computed coarse matrix */
3940         coarse_reuse = PETSC_TRUE;
3941       }
3942     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3943       coarse_reuse = PETSC_FALSE;
3944     }
3945     /* reset any subassembling information */
3946     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3947     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3948   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3949     coarse_reuse = PETSC_TRUE;
3950   }
3951 
3952   /* count "active" (i.e. with positive local size) and "void" processes */
3953   im_active = !!(pcis->n);
3954   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3955   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3956   void_procs = all_procs-active_procs;
3957   csin_type_simple = PETSC_TRUE;
3958   redist = PETSC_FALSE;
3959   if (pcbddc->current_level && void_procs) {
3960     csin_ml = PETSC_TRUE;
3961     ncoarse_ml = void_procs;
3962     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
3963     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
3964       csin_ds = PETSC_TRUE;
3965       ncoarse_ds = pcbddc->redistribute_coarse;
3966       redist = PETSC_TRUE;
3967     } else {
3968       csin_ds = PETSC_TRUE;
3969       ncoarse_ds = active_procs;
3970       redist = PETSC_TRUE;
3971     }
3972   } else {
3973     csin_ml = PETSC_FALSE;
3974     ncoarse_ml = all_procs;
3975     if (void_procs) {
3976       csin_ds = PETSC_TRUE;
3977       ncoarse_ds = void_procs;
3978       csin_type_simple = PETSC_FALSE;
3979     } else {
3980       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
3981         csin_ds = PETSC_TRUE;
3982         ncoarse_ds = pcbddc->redistribute_coarse;
3983         redist = PETSC_TRUE;
3984       } else {
3985         csin_ds = PETSC_FALSE;
3986         ncoarse_ds = all_procs;
3987       }
3988     }
3989   }
3990 
3991   /*
3992     test if we can go multilevel: three conditions must be satisfied:
3993     - we have not exceeded the number of levels requested
3994     - we can actually subassemble the active processes
3995     - we can find a suitable number of MPI processes where we can place the subassembled problem
3996   */
3997   multilevel_allowed = PETSC_FALSE;
3998   multilevel_requested = PETSC_FALSE;
3999   if (pcbddc->current_level < pcbddc->max_levels) {
4000     multilevel_requested = PETSC_TRUE;
4001     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
4002       multilevel_allowed = PETSC_FALSE;
4003     } else {
4004       multilevel_allowed = PETSC_TRUE;
4005     }
4006   }
4007   /* determine number of process partecipating to coarse solver */
4008   if (multilevel_allowed) {
4009     ncoarse = ncoarse_ml;
4010     csin = csin_ml;
4011     redist = PETSC_FALSE;
4012   } else {
4013     ncoarse = ncoarse_ds;
4014     csin = csin_ds;
4015   }
4016 
4017   /* creates temporary l2gmap and IS for coarse indexes */
4018   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
4019   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
4020 
4021   /* creates temporary MATIS object for coarse matrix */
4022   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
4023   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4024   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
4025   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4026 #if 0
4027   {
4028     PetscViewer viewer;
4029     char filename[256];
4030     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
4031     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
4032     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4033     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
4034     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4035   }
4036 #endif
4037   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
4038   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
4039   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4040   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4041   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
4042 
4043   /* compute dofs splitting and neumann boundaries for coarse dofs */
4044   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
4045     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
4046     const PetscInt         *idxs;
4047     ISLocalToGlobalMapping tmap;
4048 
4049     /* create map between primal indices (in local representative ordering) and local primal numbering */
4050     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
4051     /* allocate space for temporary storage */
4052     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
4053     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
4054     /* allocate for IS array */
4055     nisdofs = pcbddc->n_ISForDofsLocal;
4056     nisneu = !!pcbddc->NeumannBoundariesLocal;
4057     nis = nisdofs + nisneu;
4058     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
4059     /* dofs splitting */
4060     for (i=0;i<nisdofs;i++) {
4061       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4062       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4063       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4064       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4065       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4066       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4067       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4068       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4069     }
4070     /* neumann boundaries */
4071     if (pcbddc->NeumannBoundariesLocal) {
4072       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4073       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4074       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4075       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4076       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4077       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4078       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4079       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4080     }
4081     /* free memory */
4082     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4083     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4084     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4085   } else {
4086     nis = 0;
4087     nisdofs = 0;
4088     nisneu = 0;
4089     isarray = NULL;
4090   }
4091   /* destroy no longer needed map */
4092   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4093 
4094   /* restrict on coarse candidates (if needed) */
4095   coarse_mat_is = NULL;
4096   if (csin) {
4097     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4098       if (redist) {
4099         PetscMPIInt rank;
4100         PetscInt    spc,n_spc_p1,dest[1],destsize;
4101 
4102         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4103         spc = active_procs/ncoarse;
4104         n_spc_p1 = active_procs%ncoarse;
4105         if (im_active) {
4106           destsize = 1;
4107           if (rank > n_spc_p1*(spc+1)-1) {
4108             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4109           } else {
4110             dest[0] = rank/(spc+1);
4111           }
4112         } else {
4113           destsize = 0;
4114         }
4115         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4116       } else if (csin_type_simple) {
4117         PetscMPIInt rank;
4118         PetscInt    issize,isidx;
4119 
4120         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4121         if (im_active) {
4122           issize = 1;
4123           isidx = (PetscInt)rank;
4124         } else {
4125           issize = 0;
4126           isidx = -1;
4127         }
4128         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4129       } else { /* get a suitable subassembling pattern from MATIS code */
4130         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4131       }
4132 
4133       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4134       if (!redist || ncoarse <= void_procs) {
4135         PetscInt ncoarse_cand,tissize,*nisindices;
4136         PetscInt *coarse_candidates;
4137         const PetscInt* tisindices;
4138 
4139         /* get coarse candidates' ranks in pc communicator */
4140         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4141         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4142         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4143           if (!coarse_candidates[i]) {
4144             coarse_candidates[ncoarse_cand++]=i;
4145           }
4146         }
4147         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4148 
4149 
4150         if (pcbddc->dbg_flag) {
4151           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4152           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4153           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4154           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4155           for (i=0;i<ncoarse_cand;i++) {
4156             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4157           }
4158           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4159           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4160         }
4161         /* shift the pattern on coarse candidates */
4162         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4163         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4164         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4165         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4166         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4167         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4168         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4169       }
4170       if (pcbddc->dbg_flag) {
4171         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4172         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4173         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4174         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4175       }
4176     }
4177     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4178     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4179   } else {
4180     if (pcbddc->dbg_flag) {
4181       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4182       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4183       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4184     }
4185     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4186     coarse_mat_is = t_coarse_mat_is;
4187   }
4188 
4189   /* create local to global scatters for coarse problem */
4190   if (compute_vecs) {
4191     PetscInt lrows;
4192     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4193     if (coarse_mat_is) {
4194       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4195     } else {
4196       lrows = 0;
4197     }
4198     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4199     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4200     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4201     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4202     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4203   }
4204   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4205   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4206 
4207   /* set defaults for coarse KSP and PC */
4208   if (multilevel_allowed) {
4209     coarse_ksp_type = KSPRICHARDSON;
4210     coarse_pc_type = PCBDDC;
4211   } else {
4212     coarse_ksp_type = KSPPREONLY;
4213     coarse_pc_type = PCREDUNDANT;
4214   }
4215 
4216   /* print some info if requested */
4217   if (pcbddc->dbg_flag) {
4218     if (!multilevel_allowed) {
4219       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4220       if (multilevel_requested) {
4221         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4222       } else if (pcbddc->max_levels) {
4223         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4224       }
4225       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4226     }
4227   }
4228 
4229   /* create the coarse KSP object only once with defaults */
4230   if (coarse_mat_is) {
4231     MatReuse coarse_mat_reuse;
4232     PetscViewer dbg_viewer = NULL;
4233     if (pcbddc->dbg_flag) {
4234       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4235       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4236     }
4237     if (!pcbddc->coarse_ksp) {
4238       char prefix[256],str_level[16];
4239       size_t len;
4240       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4241       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4242       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4243       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4244       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4245       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4246       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4247       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4248       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4249       /* prefix */
4250       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4251       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4252       if (!pcbddc->current_level) {
4253         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4254         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4255       } else {
4256         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4257         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4258         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4259         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4260         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4261         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4262       }
4263       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4264       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4265       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4266       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4267       ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4268       /* allow user customization */
4269       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4270     }
4271     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4272     if (nisdofs) {
4273       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4274       for (i=0;i<nisdofs;i++) {
4275         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4276       }
4277     }
4278     if (nisneu) {
4279       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4280       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4281     }
4282 
4283     /* get some info after set from options */
4284     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4285     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4286     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4287     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4288     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4289       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4290       isbddc = PETSC_FALSE;
4291     }
4292     if (isredundant) {
4293       KSP inner_ksp;
4294       PC inner_pc;
4295       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4296       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4297       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4298     }
4299 
4300     /* assemble coarse matrix */
4301     if (coarse_reuse) {
4302       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4303       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4304       coarse_mat_reuse = MAT_REUSE_MATRIX;
4305     } else {
4306       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4307     }
4308     if (isbddc || isnn) {
4309       if (pcbddc->coarsening_ratio > 1) {
4310         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4311           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4312           if (pcbddc->dbg_flag) {
4313             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4314             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4315             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4316             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4317           }
4318         }
4319         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4320       } else {
4321         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4322         coarse_mat = coarse_mat_is;
4323       }
4324     } else {
4325       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4326     }
4327     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4328 
4329     /* propagate symmetry info to coarse matrix */
4330     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
4331     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4332 
4333     /* set operators */
4334     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4335     if (pcbddc->dbg_flag) {
4336       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4337     }
4338   } else { /* processes non partecipating to coarse solver (if any) */
4339     coarse_mat = 0;
4340   }
4341   ierr = PetscFree(isarray);CHKERRQ(ierr);
4342 #if 0
4343   {
4344     PetscViewer viewer;
4345     char filename[256];
4346     sprintf(filename,"coarse_mat.m");
4347     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4348     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4349     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4350     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4351   }
4352 #endif
4353 
4354   /* Compute coarse null space (special handling by BDDC only) */
4355   if (pcbddc->NullSpace) {
4356     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4357   }
4358 
4359   if (pcbddc->coarse_ksp) {
4360     Vec crhs,csol;
4361     PetscBool ispreonly;
4362     if (CoarseNullSpace) {
4363       if (isbddc) {
4364         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4365       } else {
4366         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4367       }
4368     }
4369     /* setup coarse ksp */
4370     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4371     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4372     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4373     /* hack */
4374     if (!csol) {
4375       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4376     }
4377     if (!crhs) {
4378       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4379     }
4380     /* Check coarse problem if in debug mode or if solving with an iterative method */
4381     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4382     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4383       KSP       check_ksp;
4384       KSPType   check_ksp_type;
4385       PC        check_pc;
4386       Vec       check_vec,coarse_vec;
4387       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4388       PetscInt  its;
4389       PetscBool compute_eigs;
4390       PetscReal *eigs_r,*eigs_c;
4391       PetscInt  neigs;
4392       const char *prefix;
4393 
4394       /* Create ksp object suitable for estimation of extreme eigenvalues */
4395       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4396       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4397       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4398       if (ispreonly) {
4399         check_ksp_type = KSPPREONLY;
4400         compute_eigs = PETSC_FALSE;
4401       } else {
4402         check_ksp_type = KSPGMRES;
4403         compute_eigs = PETSC_TRUE;
4404       }
4405       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4406       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4407       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4408       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4409       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4410       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4411       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4412       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4413       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4414       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4415       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4416       /* create random vec */
4417       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4418       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4419       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4420       if (CoarseNullSpace) {
4421         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4422       }
4423       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4424       /* solve coarse problem */
4425       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4426       if (CoarseNullSpace) {
4427         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4428       }
4429       /* set eigenvalue estimation if preonly has not been requested */
4430       if (compute_eigs) {
4431         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4432         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4433         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4434         lambda_max = eigs_r[neigs-1];
4435         lambda_min = eigs_r[0];
4436         if (pcbddc->use_coarse_estimates) {
4437           if (lambda_max>lambda_min) {
4438             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4439             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4440           }
4441         }
4442       }
4443 
4444       /* check coarse problem residual error */
4445       if (pcbddc->dbg_flag) {
4446         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4447         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4448         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4449         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4450         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4451         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4452         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4453         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4454         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4455         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4456         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4457         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4458         if (compute_eigs) {
4459           PetscReal lambda_max_s,lambda_min_s;
4460           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4461           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4462           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4463           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4464           for (i=0;i<neigs;i++) {
4465             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4466           }
4467         }
4468         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4469         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4470       }
4471       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4472       if (compute_eigs) {
4473         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4474         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4475       }
4476     }
4477   }
4478   /* print additional info */
4479   if (pcbddc->dbg_flag) {
4480     /* waits until all processes reaches this point */
4481     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4482     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4483     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4484   }
4485 
4486   /* free memory */
4487   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4488   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4489   PetscFunctionReturn(0);
4490 }
4491 
4492 #undef __FUNCT__
4493 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4494 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4495 {
4496   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4497   PC_IS*         pcis = (PC_IS*)pc->data;
4498   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4499   PetscInt       i,local_size,coarse_size=0;
4500   PetscInt       *local_primal_indices=NULL;
4501   PetscInt       *t_local_primal_indices=NULL;
4502   PetscErrorCode ierr;
4503 
4504   PetscFunctionBegin;
4505   /* Compute global number of coarse dofs */
4506   if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) {
4507     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
4508   }
4509   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult,&coarse_size,&t_local_primal_indices);CHKERRQ(ierr);
4510   ierr = PetscMalloc1(pcbddc->local_primal_size,&local_primal_indices);CHKERRQ(ierr);
4511   local_size = 0;
4512   for (i=0;i<pcbddc->local_primal_size_cc;i++) {
4513     PetscInt j;
4514     for (j=0;j<pcbddc->local_primal_ref_mult[i];j++) local_primal_indices[local_size++] = t_local_primal_indices[i] + j;
4515   }
4516   ierr = PetscFree(t_local_primal_indices);CHKERRQ(ierr);
4517   if (local_size != pcbddc->local_primal_size) {
4518     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size);
4519   }
4520 
4521   /* check numbering */
4522   if (pcbddc->dbg_flag) {
4523     PetscScalar coarsesum,*array;
4524     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4525 
4526     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4527     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4528     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4529     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4530     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4531     for (i=0;i<pcbddc->local_primal_size;i++) {
4532       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4533     }
4534     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4535     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4536     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4537     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4538     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4539     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4540     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4541     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4542     for (i=0;i<pcis->n;i++) {
4543       if (array[i] == 1.0) {
4544         set_error = PETSC_TRUE;
4545         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4546       }
4547     }
4548     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4549     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4550     for (i=0;i<pcis->n;i++) {
4551       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4552     }
4553     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4554     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4555     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4556     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4557     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4558     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4559     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4560       PetscInt *gidxs;
4561 
4562       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
4563       ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
4564       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4565       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4566       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4567       for (i=0;i<pcbddc->local_primal_size;i++) {
4568         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr);
4569       }
4570       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4571       ierr = PetscFree(gidxs);CHKERRQ(ierr);
4572     }
4573     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4574     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4575   }
4576   /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */
4577   /* get back data */
4578   *coarse_size_n = coarse_size;
4579   *local_primal_indices_n = local_primal_indices;
4580   PetscFunctionReturn(0);
4581 }
4582 
4583 #undef __FUNCT__
4584 #define __FUNCT__ "PCBDDCGlobalToLocal"
4585 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4586 {
4587   IS             localis_t;
4588   PetscInt       i,lsize,*idxs,n;
4589   PetscScalar    *vals;
4590   PetscErrorCode ierr;
4591 
4592   PetscFunctionBegin;
4593   /* get indices in local ordering exploiting local to global map */
4594   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4595   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4596   for (i=0;i<lsize;i++) vals[i] = 1.0;
4597   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4598   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4599   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4600   if (idxs) { /* multilevel guard */
4601     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4602   }
4603   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4604   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4605   ierr = PetscFree(vals);CHKERRQ(ierr);
4606   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4607   /* now compute set in local ordering */
4608   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4609   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4610   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4611   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4612   for (i=0,lsize=0;i<n;i++) {
4613     if (PetscRealPart(vals[i]) > 0.5) {
4614       lsize++;
4615     }
4616   }
4617   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4618   for (i=0,lsize=0;i<n;i++) {
4619     if (PetscRealPart(vals[i]) > 0.5) {
4620       idxs[lsize++] = i;
4621     }
4622   }
4623   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4624   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4625   *localis = localis_t;
4626   PetscFunctionReturn(0);
4627 }
4628 
4629 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4630 #undef __FUNCT__
4631 #define __FUNCT__ "PCBDDCMatMult_Private"
4632 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4633 {
4634   PCBDDCChange_ctx change_ctx;
4635   PetscErrorCode   ierr;
4636 
4637   PetscFunctionBegin;
4638   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4639   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4640   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4641   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4642   PetscFunctionReturn(0);
4643 }
4644 
4645 #undef __FUNCT__
4646 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4647 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4648 {
4649   PCBDDCChange_ctx change_ctx;
4650   PetscErrorCode   ierr;
4651 
4652   PetscFunctionBegin;
4653   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4654   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4655   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4656   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4657   PetscFunctionReturn(0);
4658 }
4659 
4660 #undef __FUNCT__
4661 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4662 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4663 {
4664   PC_IS               *pcis=(PC_IS*)pc->data;
4665   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4666   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4667   Mat                 S_j;
4668   PetscInt            *used_xadj,*used_adjncy;
4669   PetscBool           free_used_adj;
4670   PetscErrorCode      ierr;
4671 
4672   PetscFunctionBegin;
4673   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4674   free_used_adj = PETSC_FALSE;
4675   if (pcbddc->sub_schurs_layers == -1) {
4676     used_xadj = NULL;
4677     used_adjncy = NULL;
4678   } else {
4679     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4680       used_xadj = pcbddc->mat_graph->xadj;
4681       used_adjncy = pcbddc->mat_graph->adjncy;
4682     } else if (pcbddc->computed_rowadj) {
4683       used_xadj = pcbddc->mat_graph->xadj;
4684       used_adjncy = pcbddc->mat_graph->adjncy;
4685     } else {
4686       PetscBool      flg_row=PETSC_FALSE;
4687       const PetscInt *xadj,*adjncy;
4688       PetscInt       nvtxs;
4689 
4690       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4691       if (flg_row) {
4692         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4693         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4694         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4695         free_used_adj = PETSC_TRUE;
4696       } else {
4697         pcbddc->sub_schurs_layers = -1;
4698         used_xadj = NULL;
4699         used_adjncy = NULL;
4700       }
4701       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4702     }
4703   }
4704 
4705   /* setup sub_schurs data */
4706   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4707   if (!sub_schurs->use_mumps) {
4708     /* pcbddc->ksp_D up to date only if not using MUMPS */
4709     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4710     ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4711   } else {
4712     PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
4713     ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4714   }
4715   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4716 
4717   /* free adjacency */
4718   if (free_used_adj) {
4719     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4720   }
4721   PetscFunctionReturn(0);
4722 }
4723 
4724 #undef __FUNCT__
4725 #define __FUNCT__ "PCBDDCInitSubSchurs"
4726 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4727 {
4728   PC_IS               *pcis=(PC_IS*)pc->data;
4729   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4730   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4731   PCBDDCGraph         graph;
4732   PetscErrorCode      ierr;
4733 
4734   PetscFunctionBegin;
4735   /* attach interface graph for determining subsets */
4736   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4737     IS verticesIS;
4738 
4739     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4740     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4741     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr);
4742     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr);
4743     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4744     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4745 /*
4746     if (pcbddc->dbg_flag) {
4747       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4748     }
4749 */
4750   } else {
4751     graph = pcbddc->mat_graph;
4752   }
4753 
4754   /* sub_schurs init */
4755   ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr);
4756 
4757   /* free graph struct */
4758   if (pcbddc->sub_schurs_rebuild) {
4759     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4760   }
4761   PetscFunctionReturn(0);
4762 }
4763