xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 6816873a55b70635a36c8ab78c96f47555ca6ab6)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscReal       *eigs,thresh;
20   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
21   PetscBool       allocated_S_St;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscErrorCode  ierr;
26 
27   PetscFunctionBegin;
28   if (!sub_schurs->use_mumps) {
29     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
30   }
31 
32   if (pcbddc->dbg_flag) {
33     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
34     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
35     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
36     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
37   }
38 
39   if (pcbddc->dbg_flag) {
40     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
41   }
42 
43   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
44     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
45   }
46 
47   /* max size of subsets */
48   mss = 0;
49   for (i=0;i<sub_schurs->n_subs;i++) {
50     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
51       PetscInt subset_size;
52       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
53       mss = PetscMax(mss,subset_size);
54     }
55   }
56 
57   /* min/max and threshold */
58   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
59   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
60   nmax = PetscMax(nmin,nmax);
61   allocated_S_St = PETSC_FALSE;
62   if (nmin) {
63     allocated_S_St = PETSC_TRUE;
64   }
65 
66   /* allocate lapack workspace */
67   cum = cum2 = 0;
68   maxneigs = 0;
69   for (i=0;i<sub_schurs->n_subs;i++) {
70     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
71       PetscInt n,subset_size;
72 
73       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
74       n = PetscMin(subset_size,nmax);
75       cum += subset_size;
76       cum2 += subset_size*n;
77       maxneigs = PetscMax(maxneigs,n);
78     }
79   }
80   if (mss) {
81     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
82       PetscBLASInt B_itype = 1;
83       PetscBLASInt B_N = mss;
84       PetscReal    zero = 0.0;
85       PetscReal    eps = 0.0; /* dlamch? */
86 
87       B_lwork = -1;
88       S = NULL;
89       St = NULL;
90       eigs = NULL;
91       eigv = NULL;
92       B_iwork = NULL;
93       B_ifail = NULL;
94       thresh = 1.0;
95       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
96 #if defined(PETSC_USE_COMPLEX)
97       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
98 #else
99       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
100 #endif
101       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
102       ierr = PetscFPTrapPop();CHKERRQ(ierr);
103     } else {
104         /* TODO */
105     }
106   } else {
107     lwork = 0;
108   }
109 
110   nv = 0;
111   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
112     ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr);
113   }
114   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
115   if (allocated_S_St) {
116     ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr);
117   }
118   ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
119 #if defined(PETSC_USE_COMPLEX)
120   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
121 #endif
122   ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
123                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
124                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
125                       nv+cum,&pcbddc->adaptive_constraints_idxs,
126                       nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
127   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
128 
129   maxneigs = 0;
130   cum = cum2 = cumarray = 0;
131   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
132   pcbddc->adaptive_constraints_data_ptr[0] = 0;
133   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
134     const PetscInt *idxs;
135 
136     ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
137     for (cum=0;cum<nv;cum++) {
138       pcbddc->adaptive_constraints_n[cum] = 1;
139       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
140       pcbddc->adaptive_constraints_data[cum] = 1.0;
141       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
142       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
143     }
144     cum2 = cum;
145     ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
146   }
147 
148   if (mss) { /* multilevel */
149     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
150     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
151   }
152 
153   for (i=0;i<sub_schurs->n_subs;i++) {
154     PetscInt j,subset_size;
155 
156     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
157     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
158       const PetscInt *idxs;
159       PetscReal      infty = PETSC_MAX_REAL;
160       PetscInt       eigs_start = 0;
161       PetscBLASInt   B_N;
162 
163       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
164       if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
165         if (sub_schurs->is_hermitian) {
166           PetscInt j;
167           for (j=0;j<subset_size;j++) {
168             ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
169           }
170           for (j=0;j<subset_size;j++) {
171             ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
172           }
173         } else {
174           ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
175           ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
176         }
177       } else {
178         S = Sarray + cumarray;
179         St = Starray + cumarray;
180       }
181 
182       /* Threshold: this is an heuristic for edges */
183       ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
184       thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold;
185 
186       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
187         PetscBLASInt B_itype = 1;
188         PetscBLASInt B_IL, B_IU;
189         PetscReal    eps = -1.0; /* dlamch? */
190         PetscInt     nmin_s;
191 
192         /* ask for eigenvalues larger than thresh */
193         if (pcbddc->dbg_flag) {
194           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
195         }
196         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
197 #if defined(PETSC_USE_COMPLEX)
198         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
199 #else
200         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
201 #endif
202         ierr = PetscFPTrapPop();CHKERRQ(ierr);
203         if (B_ierr) {
204           if (B_ierr < 0 ) {
205             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
206           } else if (B_ierr <= B_N) {
207             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
208           } else {
209             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
210           }
211         }
212 
213         if (B_neigs > nmax) {
214           if (pcbddc->dbg_flag) {
215             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
216           }
217           eigs_start = B_neigs -nmax;
218           B_neigs = nmax;
219         }
220 
221         nmin_s = PetscMin(nmin,B_N);
222         if (B_neigs < nmin_s) {
223           PetscBLASInt B_neigs2;
224 
225           B_IU = B_N - B_neigs;
226           B_IL = B_N - nmin_s + 1;
227           if (pcbddc->dbg_flag) {
228             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
229           }
230           if (sub_schurs->is_hermitian) {
231             PetscInt j;
232             for (j=0;j<subset_size;j++) {
233               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
234             }
235             for (j=0;j<subset_size;j++) {
236               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
237             }
238           } else {
239             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
240             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
241           }
242           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
243 #if defined(PETSC_USE_COMPLEX)
244           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
245 #else
246           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
247 #endif
248           ierr = PetscFPTrapPop();CHKERRQ(ierr);
249           B_neigs += B_neigs2;
250         }
251         if (B_ierr) {
252           if (B_ierr < 0 ) {
253             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
254           } else if (B_ierr <= B_N) {
255             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
256           } else {
257             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
258           }
259         }
260         if (pcbddc->dbg_flag) {
261           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);
262           for (j=0;j<B_neigs;j++) {
263             if (eigs[j] == 0.0) {
264               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");
265             } else {
266               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);
267             }
268           }
269         }
270       } else {
271           /* TODO */
272       }
273 
274       maxneigs = PetscMax(B_neigs,maxneigs);
275       pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
276       if (B_neigs) {
277         ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
278 
279         if (pcbddc->dbg_flag > 1) {
280           PetscInt ii;
281           for (ii=0;ii<B_neigs;ii++) {
282             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
283             for (j=0;j<B_N;j++) {
284               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
285             }
286           }
287         }
288 #if 0
289         for (j=0;j<B_neigs;j++) {
290           PetscBLASInt Blas_N,Blas_one = 1.0;
291           PetscScalar norm;
292           ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
293           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,
294                                                      &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
295           if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
296             norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
297           } else {
298             norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
299           }
300           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
301         }
302 #endif
303         ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
304         pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
305         pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
306         cum++;
307       }
308       ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
309     }
310     /* shift for next computation */
311     cumarray += subset_size*subset_size;
312   }
313   if (pcbddc->dbg_flag) {
314     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
315   }
316 
317   if (mss) {
318     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
319     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
320     /* destroy matrices (junk) */
321     ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
322     ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
323   }
324   if (allocated_S_St) {
325     ierr = PetscFree2(S,St);CHKERRQ(ierr);
326   }
327   ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
328 #if defined(PETSC_USE_COMPLEX)
329   ierr = PetscFree(rwork);CHKERRQ(ierr);
330 #endif
331   if (pcbddc->dbg_flag) {
332     PetscInt maxneigs_r;
333     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
334     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
335   }
336   PetscFunctionReturn(0);
337 }
338 
339 #undef __FUNCT__
340 #define __FUNCT__ "PCBDDCSetUpSolvers"
341 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
342 {
343   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
344   PetscScalar    *coarse_submat_vals;
345   PetscErrorCode ierr;
346 
347   PetscFunctionBegin;
348   /* Setup local scatters R_to_B and (optionally) R_to_D */
349   /* PCBDDCSetUpLocalWorkVectors should be called first! */
350   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
351 
352   /* Setup local neumann solver ksp_R */
353   /* PCBDDCSetUpLocalScatters should be called first! */
354   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
355 
356   /* Change global null space passed in by the user if change of basis has been requested */
357   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
358     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
359   }
360 
361   /*
362      Setup local correction and local part of coarse basis.
363      Gives back the dense local part of the coarse matrix in column major ordering
364   */
365   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
366 
367   /* Compute total number of coarse nodes and setup coarse solver */
368   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
369 
370   /* free */
371   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
372   PetscFunctionReturn(0);
373 }
374 
375 #undef __FUNCT__
376 #define __FUNCT__ "PCBDDCResetCustomization"
377 PetscErrorCode PCBDDCResetCustomization(PC pc)
378 {
379   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
380   PetscErrorCode ierr;
381 
382   PetscFunctionBegin;
383   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
384   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
385   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
386   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
387   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
388   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
389   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
390   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
391   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
392   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
393   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
394   PetscFunctionReturn(0);
395 }
396 
397 #undef __FUNCT__
398 #define __FUNCT__ "PCBDDCResetTopography"
399 PetscErrorCode PCBDDCResetTopography(PC pc)
400 {
401   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
402   PetscErrorCode ierr;
403 
404   PetscFunctionBegin;
405   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
406   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
407   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
408   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
409   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
410   PetscFunctionReturn(0);
411 }
412 
413 #undef __FUNCT__
414 #define __FUNCT__ "PCBDDCResetSolvers"
415 PetscErrorCode PCBDDCResetSolvers(PC pc)
416 {
417   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
418   PetscScalar    *array;
419   PetscErrorCode ierr;
420 
421   PetscFunctionBegin;
422   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
423   if (pcbddc->coarse_phi_B) {
424     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
425     ierr = PetscFree(array);CHKERRQ(ierr);
426   }
427   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
428   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
429   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
430   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
431   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
432   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
433   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
434   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
435   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
436   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
437   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
438   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
439   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
440   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
441   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
442   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
443   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
444   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
445   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
446   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
447   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
448   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
449   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
450   PetscFunctionReturn(0);
451 }
452 
453 #undef __FUNCT__
454 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
455 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
456 {
457   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
458   PC_IS          *pcis = (PC_IS*)pc->data;
459   VecType        impVecType;
460   PetscInt       n_constraints,n_R,old_size;
461   PetscErrorCode ierr;
462 
463   PetscFunctionBegin;
464   if (!pcbddc->ConstraintMatrix) {
465     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
466   }
467   /* get sizes */
468   n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices;
469   n_R = pcis->n-pcbddc->n_vertices;
470   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
471   /* local work vectors (try to avoid unneeded work)*/
472   /* R nodes */
473   old_size = -1;
474   if (pcbddc->vec1_R) {
475     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
476   }
477   if (n_R != old_size) {
478     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
479     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
480     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
481     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
482     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
483     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
484   }
485   /* local primal dofs */
486   old_size = -1;
487   if (pcbddc->vec1_P) {
488     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
489   }
490   if (pcbddc->local_primal_size != old_size) {
491     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
492     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
493     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
494     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
495   }
496   /* local explicit constraints */
497   old_size = -1;
498   if (pcbddc->vec1_C) {
499     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
500   }
501   if (n_constraints && n_constraints != old_size) {
502     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
503     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
504     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
505     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
506   }
507   PetscFunctionReturn(0);
508 }
509 
510 #undef __FUNCT__
511 #define __FUNCT__ "PCBDDCSetUpCorrection"
512 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
513 {
514   PetscErrorCode  ierr;
515   /* pointers to pcis and pcbddc */
516   PC_IS*          pcis = (PC_IS*)pc->data;
517   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
518   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
519   /* submatrices of local problem */
520   Mat             A_RV,A_VR,A_VV,local_auxmat2_R;
521   /* submatrices of local coarse problem */
522   Mat             S_VV,S_CV,S_VC,S_CC;
523   /* working matrices */
524   Mat             C_CR;
525   /* additional working stuff */
526   PC              pc_R;
527   Mat             F;
528   PetscBool       isLU,isCHOL,isILU;
529 
530   PetscScalar     *coarse_submat_vals; /* TODO: use a PETSc matrix */
531   PetscScalar     *work;
532   PetscInt        *idx_V_B;
533   PetscInt        n,n_vertices,n_constraints;
534   PetscInt        i,n_R,n_D,n_B;
535   PetscBool       unsymmetric_check;
536   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
537   MatType         impMatType;
538   /* some shortcuts to scalars */
539   PetscScalar     one=1.0,m_one=-1.0;
540 
541   PetscFunctionBegin;
542   n_vertices = pcbddc->n_vertices;
543   n_constraints = pcbddc->local_primal_size-n_vertices;
544   /* Set Non-overlapping dimensions */
545   n_B = pcis->n_B;
546   n_D = pcis->n - n_B;
547   n_R = pcis->n - n_vertices;
548 
549   /* Set types for local objects needed by BDDC precondtioner */
550   impMatType = MATSEQDENSE;
551 
552   /* vertices in boundary numbering */
553   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
554   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr);
555   if (i != n_vertices) {
556     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
557   }
558 
559   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
560   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
561   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
562   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
563   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
564   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
565   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
566   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
567   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
568   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
569 
570   unsymmetric_check = PETSC_FALSE;
571   /* allocate workspace */
572   n = 0;
573   if (n_constraints) {
574     n += n_R*n_constraints;
575   }
576   if (n_vertices) {
577     n = PetscMax(2*n_R*n_vertices,n);
578     n = PetscMax((n_R+n_B)*n_vertices,n);
579   }
580   if (!pcbddc->issym) {
581     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
582     unsymmetric_check = PETSC_TRUE;
583   }
584   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
585 
586   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
587   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
588   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
589   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
590   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
591   if (isLU || isILU || isCHOL) {
592     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
593   } else if (sub_schurs->reuse_mumps) {
594     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
595     MatFactorType type;
596 
597     F = reuse_mumps->F;
598     ierr = MatGetFactorType(F,&type);CHKERRQ(ierr);
599     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
600   } else {
601     F = NULL;
602   }
603 
604   /* Precompute stuffs needed for preprocessing and application of BDDC*/
605   if (n_constraints) {
606     Mat         M1,M2,M3;
607     Mat         auxmat;
608     IS          is_aux;
609     PetscScalar *array,*array2;
610 
611     ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
612     ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
613 
614     /* Extract constraints on R nodes: C_{CR}  */
615     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
616     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
617     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr);
618 
619     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
620     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
621     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
622     for (i=0;i<n_constraints;i++) {
623       const PetscScalar *row_cmat_values;
624       const PetscInt    *row_cmat_indices;
625       PetscInt          size_of_constraint,j;
626 
627       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
628       for (j=0;j<size_of_constraint;j++) {
629         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
630       }
631       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
632     }
633     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr);
634     if (F) {
635       Mat B;
636 
637       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
638       ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr);
639       ierr = MatDestroy(&B);CHKERRQ(ierr);
640     } else {
641       PetscScalar *marr;
642 
643       ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
644       for (i=0;i<n_constraints;i++) {
645         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
646         ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr);
647         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
648         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
649         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
650       }
651       ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
652     }
653     if (!pcbddc->switch_static) {
654       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr);
655       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
656       ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
657       for (i=0;i<n_constraints;i++) {
658         ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr);
659         ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr);
660         ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
661         ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
662         ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
663         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
664       }
665       ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
666       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
667       ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
668     } else {
669       ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr);
670       pcbddc->local_auxmat2 = local_auxmat2_R;
671       ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
672     }
673     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
674     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
675     ierr = MatScale(M3,m_one);CHKERRQ(ierr);
676     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
677     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
678     if (isCHOL) {
679       ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr);
680     } else {
681       ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
682     }
683     ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr);
684     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
685     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
686     ierr = MatDestroy(&M2);CHKERRQ(ierr);
687     ierr = MatDestroy(&M3);CHKERRQ(ierr);
688     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
689     ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
690     ierr = MatDestroy(&auxmat);CHKERRQ(ierr);
691     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
692     ierr = MatDestroy(&M1);CHKERRQ(ierr);
693   }
694   /* Get submatrices from subdomain matrix */
695   if (n_vertices) {
696     Mat       newmat;
697     IS        is_aux;
698     PetscInt  ibs,mbs;
699     PetscBool issbaij;
700 
701     if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */
702       IS tis;
703 
704       ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr);
705       ierr = ISSort(tis);CHKERRQ(ierr);
706       ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr);
707       ierr = ISDestroy(&tis);CHKERRQ(ierr);
708     } else {
709       ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
710     }
711     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
712     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
713     if (ibs != mbs) { /* need to convert to SEQAIJ */
714       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
715       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
716       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
717       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
718       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
719       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
720     } else {
721       /* this is safe */
722       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
723       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
724       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
725         ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
726         ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
727         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
728         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
729         ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
730         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
731       } else {
732         ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
733         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
734         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
735       }
736       /* need to convert to SeqAIJ for MatMatMult with dense matrix */
737       ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
738     }
739     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
740   }
741 
742   /* Matrix of coarse basis functions (local) */
743   if (pcbddc->coarse_phi_B) {
744     PetscInt on_B,on_primal,on_D=n_D;
745     if (pcbddc->coarse_phi_D) {
746       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
747     }
748     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
749     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
750       PetscScalar *marray;
751 
752       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
753       ierr = PetscFree(marray);CHKERRQ(ierr);
754       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
755       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
756       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
757       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
758     }
759   }
760 
761   if (!pcbddc->coarse_phi_B) {
762     PetscScalar *marray;
763 
764     n = n_B*pcbddc->local_primal_size;
765     if (pcbddc->switch_static || pcbddc->dbg_flag) {
766       n += n_D*pcbddc->local_primal_size;
767     }
768     if (!pcbddc->issym) {
769       n *= 2;
770     }
771     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
772     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
773     n = n_B*pcbddc->local_primal_size;
774     if (pcbddc->switch_static || pcbddc->dbg_flag) {
775       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
776       n += n_D*pcbddc->local_primal_size;
777     }
778     if (!pcbddc->issym) {
779       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
780       if (pcbddc->switch_static || pcbddc->dbg_flag) {
781         n = n_B*pcbddc->local_primal_size;
782         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
783       }
784     } else {
785       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
786       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
787       if (pcbddc->switch_static || pcbddc->dbg_flag) {
788         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
789         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
790       }
791     }
792   }
793   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
794   /* vertices */
795   if (n_vertices) {
796 
797     if (n_R) {
798       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
799       PetscBLASInt B_N,B_one = 1;
800       PetscScalar  *x,*y;
801 
802       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
803       ierr = MatScale(A_RV,m_one);CHKERRQ(ierr);
804       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
805       if (F) { /* TODO could be optimized for symmetric problems */
806         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
807       } else {
808         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
809         for (i=0;i<n_vertices;i++) {
810           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
811           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
812           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
813           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
814           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
815         }
816         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
817       }
818       ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
819       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
820       if (n_constraints) {
821         Mat B;
822 
823         ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
824         for (i=0;i<n_vertices;i++) {
825           ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
826           ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr);
827           ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
828           ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
829           ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
830           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
831         }
832         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
833         ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
834         ierr = MatDestroy(&B);CHKERRQ(ierr);
835         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
836         ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
837         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
838         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
839         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
840         ierr = MatDestroy(&B);CHKERRQ(ierr);
841       }
842       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
843       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
844       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
845       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
846       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
847       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
848       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
849       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
850       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
851       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
852     } else {
853       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
854     }
855     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
856     /* coarse basis functions */
857     for (i=0;i<n_vertices;i++) {
858       PetscScalar *y;
859 
860       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
861       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
862       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
863       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
864       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
865       y[n_B*i+idx_V_B[i]] = 1.0;
866       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
867       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
868 
869       if (pcbddc->switch_static || pcbddc->dbg_flag) {
870         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
871         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
872         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
873         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
874         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
875         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
876       }
877       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
878     }
879   }
880 
881   if (n_constraints) {
882     Mat B;
883 
884     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
885     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
886     ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
887     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
888     if (n_vertices) {
889       if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
890         ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr);
891       } else {
892         Mat S_VCt;
893 
894         ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr);
895         ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
896         ierr = MatDestroy(&S_VCt);CHKERRQ(ierr);
897       }
898     }
899     ierr = MatDestroy(&B);CHKERRQ(ierr);
900     /* coarse basis functions */
901     for (i=0;i<n_constraints;i++) {
902       PetscScalar *y;
903 
904       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
905       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
906       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
907       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
908       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
909       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
910       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
911       if (pcbddc->switch_static || pcbddc->dbg_flag) {
912         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
913         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
914         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
915         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
916         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
917         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
918       }
919       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
920     }
921   }
922   if (n_constraints) {
923     ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr);
924   }
925 
926   /* compute other basis functions for non-symmetric problems */
927   if (!pcbddc->issym) {
928 
929     if (n_constraints) {
930       Mat S_CCT,B_C;
931 
932       /* this is a lazy thing */
933       ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
934       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
935       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
936       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
937       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
938       if (n_vertices) {
939         Mat B_V,S_VCT;
940 
941         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
942         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
943         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
944         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
945         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
946       }
947       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
948     } else { /* if there are no constraints, reset work */
949       ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
950     }
951     if (n_vertices && n_R) {
952       Mat          A_VRT;
953       PetscScalar  *marray;
954       PetscBLASInt B_N,B_one = 1;
955 
956       ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr);
957       ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
958       ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr);
959       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
960       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one));
961       ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr);
962       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
963     }
964 
965     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
966       for (i=0;i<pcbddc->local_primal_size;i++) {
967         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
968         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
969         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
970         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
971         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
972       }
973     } else {
974       for (i=0;i<pcbddc->local_primal_size;i++) {
975         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
976         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
977         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
978         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
979         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
980       }
981     }
982     /* coarse basis functions */
983     for (i=0;i<pcbddc->local_primal_size;i++) {
984       PetscScalar *y;
985 
986       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
987       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
988       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
989       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
990       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
991       if (i<n_vertices) {
992         y[n_B*i+idx_V_B[i]] = 1.0;
993       }
994       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
995       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
996 
997       if (pcbddc->switch_static || pcbddc->dbg_flag) {
998         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
999         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1000         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1001         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1002         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1003         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1004       }
1005       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1006     }
1007   }
1008   /* free memory */
1009   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
1010   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
1011   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
1012   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
1013   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
1014   ierr = PetscFree(work);CHKERRQ(ierr);
1015   if (n_vertices) {
1016     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1017   }
1018   if (n_constraints) {
1019     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1020   }
1021   /* Checking coarse_sub_mat and coarse basis functios */
1022   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1023   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1024   if (pcbddc->dbg_flag) {
1025     Mat         coarse_sub_mat;
1026     Mat         AUXMAT,TM1,TM2,TM3,TM4;
1027     Mat         coarse_phi_D,coarse_phi_B;
1028     Mat         coarse_psi_D,coarse_psi_B;
1029     Mat         A_II,A_BB,A_IB,A_BI;
1030     Mat         C_B,CPHI;
1031     IS          is_dummy;
1032     Vec         mones;
1033     MatType     checkmattype=MATSEQAIJ;
1034     PetscReal   real_value;
1035 
1036     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
1037     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
1038     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
1039     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
1040     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
1041     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
1042     if (unsymmetric_check) {
1043       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
1044       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
1045     }
1046     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
1047 
1048     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1049     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr);
1050     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1051     if (unsymmetric_check) {
1052       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1053       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1054       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1055       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1056       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1057       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1058       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1059       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1060       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1061       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1062       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1063       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1064     } else {
1065       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1066       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1067       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1068       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1069       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1070       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1071       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1072       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1073     }
1074     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1075     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1076     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1077     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1078     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1079     ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1080     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1081     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d      matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1082 
1083     /* check constraints */
1084     ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr);
1085     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1086     ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1087     ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr);
1088     ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1089     ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1090     ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1091     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1092     ierr = MatDestroy(&C_B);CHKERRQ(ierr);
1093     ierr = MatDestroy(&CPHI);CHKERRQ(ierr);
1094     ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1095     ierr = VecDestroy(&mones);CHKERRQ(ierr);
1096 
1097     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1098     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1099     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1100     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1101     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1102     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1103     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1104     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1105     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1106     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1107     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1108     if (unsymmetric_check) {
1109       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1110       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1111     }
1112     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1113   }
1114   /* get back data */
1115   *coarse_submat_vals_n = coarse_submat_vals;
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 #undef __FUNCT__
1120 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1121 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1122 {
1123   Mat            *work_mat;
1124   IS             isrow_s,iscol_s;
1125   PetscBool      rsorted,csorted;
1126   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1127   PetscErrorCode ierr;
1128 
1129   PetscFunctionBegin;
1130   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1131   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1132   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1133   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1134 
1135   if (!rsorted) {
1136     const PetscInt *idxs;
1137     PetscInt *idxs_sorted,i;
1138 
1139     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1140     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1141     for (i=0;i<rsize;i++) {
1142       idxs_perm_r[i] = i;
1143     }
1144     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1145     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1146     for (i=0;i<rsize;i++) {
1147       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1148     }
1149     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1150     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1151   } else {
1152     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1153     isrow_s = isrow;
1154   }
1155 
1156   if (!csorted) {
1157     if (isrow == iscol) {
1158       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1159       iscol_s = isrow_s;
1160     } else {
1161       const PetscInt *idxs;
1162       PetscInt *idxs_sorted,i;
1163 
1164       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1165       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1166       for (i=0;i<csize;i++) {
1167         idxs_perm_c[i] = i;
1168       }
1169       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1170       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1171       for (i=0;i<csize;i++) {
1172         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1173       }
1174       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1175       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1176     }
1177   } else {
1178     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1179     iscol_s = iscol;
1180   }
1181 
1182   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1183 
1184   if (!rsorted || !csorted) {
1185     Mat      new_mat;
1186     IS       is_perm_r,is_perm_c;
1187 
1188     if (!rsorted) {
1189       PetscInt *idxs_r,i;
1190       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1191       for (i=0;i<rsize;i++) {
1192         idxs_r[idxs_perm_r[i]] = i;
1193       }
1194       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1195       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1196     } else {
1197       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1198     }
1199     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1200 
1201     if (!csorted) {
1202       if (isrow_s == iscol_s) {
1203         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1204         is_perm_c = is_perm_r;
1205       } else {
1206         PetscInt *idxs_c,i;
1207         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1208         for (i=0;i<csize;i++) {
1209           idxs_c[idxs_perm_c[i]] = i;
1210         }
1211         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1212         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1213       }
1214     } else {
1215       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1216     }
1217     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1218 
1219     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1220     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1221     work_mat[0] = new_mat;
1222     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1223     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1224   }
1225 
1226   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1227   *B = work_mat[0];
1228   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1229   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1230   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 #undef __FUNCT__
1235 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1236 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1237 {
1238   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1239   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1240   Mat            new_mat;
1241   IS             is_local,is_global;
1242   PetscInt       local_size;
1243   PetscBool      isseqaij;
1244   PetscErrorCode ierr;
1245 
1246   PetscFunctionBegin;
1247   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1248   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1249   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1250   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1251   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1252   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1253   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1254 
1255   /* check */
1256   if (pcbddc->dbg_flag) {
1257     Vec       x,x_change;
1258     PetscReal error;
1259 
1260     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1261     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1262     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1263     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1264     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1265     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1266     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1267     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1268     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1269     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1270     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1271     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1272     ierr = VecDestroy(&x);CHKERRQ(ierr);
1273     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1274   }
1275 
1276   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1277   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1278   if (isseqaij) {
1279     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1280   } else {
1281     Mat work_mat;
1282     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1283     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1284     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1285   }
1286   ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
1287 #if !defined(PETSC_USE_COMPLEX)
1288   ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,pcbddc->issym);CHKERRQ(ierr);
1289 #endif
1290   /*
1291   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1292   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1293   */
1294   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1295   PetscFunctionReturn(0);
1296 }
1297 
1298 #undef __FUNCT__
1299 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1300 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1301 {
1302   PC_IS*          pcis = (PC_IS*)(pc->data);
1303   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
1304   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1305   IS              is_B_reuse_mumps=NULL;
1306   PetscInt        *idx_R_local;
1307   PetscInt        n_vertices,i,j,n_R,n_D,n_B;
1308   PetscInt        vbs,bs;
1309   PetscBT         bitmask=NULL;
1310   PetscErrorCode  ierr;
1311 
1312   PetscFunctionBegin;
1313   /*
1314     No need to setup local scatters if
1315       - primal space is unchanged
1316         AND
1317       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1318         AND
1319       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1320   */
1321   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1322     PetscFunctionReturn(0);
1323   }
1324   /* destroy old objects */
1325   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1326   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1327   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1328   /* Set Non-overlapping dimensions */
1329   n_B = pcis->n_B;
1330   n_D = pcis->n - n_B;
1331   n_vertices = pcbddc->n_vertices;
1332 
1333   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1334   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1335 
1336   /* create auxiliary bitmask */
1337   if (!sub_schurs->reuse_mumps) {
1338     ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1339     for (i=0;i<n_vertices;i++) {
1340       ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr);
1341     }
1342 
1343     for (i=0, n_R=0; i<pcis->n; i++) {
1344       if (!PetscBTLookup(bitmask,i)) {
1345         idx_R_local[n_R++] = i;
1346       }
1347     }
1348   } else { /* A different ordering and auxiliary scatters are needed we are reusing MUMPS Schur solver */
1349     PCBDDCReuseMumps       reuse_mumps = sub_schurs->reuse_mumps;
1350     const PetscInt         *idxs;
1351     PetscInt               schur_size;
1352     IS                     list[2],tisB_N,tisR;
1353 
1354     if (sub_schurs->is_dir) {
1355       list[0] = sub_schurs->is_Ej_all;
1356       list[1] = sub_schurs->is_dir;
1357       ierr = ISGlobalToLocalMappingApplyIS(pcis->BtoNmap,IS_GTOLM_DROP,sub_schurs->is_dir,&list[1]);CHKERRQ(ierr);
1358       ierr = ISConcatenate(PETSC_COMM_SELF,2,list,&is_B_reuse_mumps);CHKERRQ(ierr);
1359       ierr = ISDestroy(&list[1]);CHKERRQ(ierr);
1360     } else {
1361       ierr = ISDuplicate(sub_schurs->is_Ej_all,&is_B_reuse_mumps);CHKERRQ(ierr);
1362     }
1363     ierr = ISGetLocalSize(is_B_reuse_mumps,&schur_size);CHKERRQ(ierr);
1364     if (reuse_mumps->S_inv) {
1365       ierr = VecScatterDestroy(&reuse_mumps->correction_scatter_B);CHKERRQ(ierr);
1366       ierr = VecScatterCreate(pcis->vec1_B,is_B_reuse_mumps,reuse_mumps->solB,NULL,&reuse_mumps->correction_scatter_B);CHKERRQ(ierr);
1367     }
1368     ierr = ISLocalToGlobalMappingApplyIS(sub_schurs->BtoNmap,is_B_reuse_mumps,&tisB_N);CHKERRQ(ierr);
1369     list[0] = pcis->is_I_local;
1370     list[1] = tisB_N;
1371     ierr = ISConcatenate(PETSC_COMM_SELF,2,list,&tisR);CHKERRQ(ierr);
1372     ierr = ISDestroy(&tisB_N);CHKERRQ(ierr);
1373     ierr = ISGetLocalSize(tisR,&n_R);CHKERRQ(ierr);
1374     ierr = ISGetIndices(tisR,&idxs);CHKERRQ(ierr);
1375     ierr = PetscMemcpy(idx_R_local,idxs,n_R*sizeof(PetscInt));CHKERRQ(ierr);
1376     ierr = ISRestoreIndices(tisR,&idxs);CHKERRQ(ierr);
1377     ierr = ISDestroy(&tisR);CHKERRQ(ierr);
1378   }
1379 
1380   /* Block code */
1381   vbs = 1;
1382   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1383   if (bs>1 && !(n_vertices%bs)) {
1384     PetscBool is_blocked = PETSC_TRUE;
1385     PetscInt  *vary;
1386     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1387     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1388     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1389     for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
1390     for (i=0; i<n_vertices/bs; i++) {
1391       if (vary[i]!=0 && vary[i]!=bs) {
1392         is_blocked = PETSC_FALSE;
1393         break;
1394       }
1395     }
1396     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1397       vbs = bs;
1398       for (i=0;i<n_R/vbs;i++) {
1399         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1400       }
1401     }
1402     ierr = PetscFree(vary);CHKERRQ(ierr);
1403   }
1404   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1405   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1406 
1407   /* print some info if requested */
1408   if (pcbddc->dbg_flag) {
1409     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1410     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1411     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1412     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1413     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1414     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1415     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1416   }
1417 
1418   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1419   if (!sub_schurs->reuse_mumps) {
1420     IS       is_aux1,is_aux2;
1421     PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1422 
1423     ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1424     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1425     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1426     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1427     for (i=0; i<n_D; i++) {
1428       ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1429     }
1430     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1431     for (i=0, j=0; i<n_R; i++) {
1432       if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1433         aux_array1[j++] = i;
1434       }
1435     }
1436     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1437     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1438     for (i=0, j=0; i<n_B; i++) {
1439       if (!PetscBTLookup(bitmask,is_indices[i])) {
1440         aux_array2[j++] = i;
1441       }
1442     }
1443     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1444     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1445     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1446     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1447     ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1448 
1449     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1450       ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1451       for (i=0, j=0; i<n_R; i++) {
1452         if (PetscBTLookup(bitmask,idx_R_local[i])) {
1453           aux_array1[j++] = i;
1454         }
1455       }
1456       ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1457       ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1458       ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1459     }
1460     ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1461     ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1462   } else {
1463     IS       tis;
1464     PetscInt schur_size;
1465 
1466     ierr = ISGetLocalSize(is_B_reuse_mumps,&schur_size);CHKERRQ(ierr);
1467     ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr);
1468     ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,is_B_reuse_mumps,&pcbddc->R_to_B);CHKERRQ(ierr);
1469     ierr = ISDestroy(&tis);CHKERRQ(ierr);
1470     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1471       ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr);
1472       ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1473       ierr = ISDestroy(&tis);CHKERRQ(ierr);
1474     }
1475   }
1476   ierr = ISDestroy(&is_B_reuse_mumps);CHKERRQ(ierr);
1477   PetscFunctionReturn(0);
1478 }
1479 
1480 
1481 #undef __FUNCT__
1482 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1483 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1484 {
1485   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1486   PC_IS          *pcis = (PC_IS*)pc->data;
1487   PC             pc_temp;
1488   Mat            A_RR;
1489   MatReuse       reuse;
1490   PetscScalar    m_one = -1.0;
1491   PetscReal      value;
1492   PetscInt       n_D,n_R,ibs,mbs;
1493   PetscBool      use_exact,use_exact_reduced,issbaij;
1494   PetscErrorCode ierr;
1495   /* prefixes stuff */
1496   char           dir_prefix[256],neu_prefix[256],str_level[16];
1497   size_t         len;
1498 
1499   PetscFunctionBegin;
1500 
1501   /* compute prefixes */
1502   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1503   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1504   if (!pcbddc->current_level) {
1505     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1506     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1507     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1508     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1509   } else {
1510     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1511     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1512     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1513     len -= 15; /* remove "pc_bddc_coarse_" */
1514     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1515     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1516     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1517     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1518     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1519     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1520     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1521     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1522   }
1523 
1524   /* DIRICHLET PROBLEM */
1525   if (dirichlet) {
1526     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1527     if (pcbddc->issym) {
1528       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1529     }
1530     /* Matrix for Dirichlet problem is pcis->A_II */
1531     n_D = pcis->n - pcis->n_B;
1532     if (!pcbddc->ksp_D) { /* create object if not yet build */
1533       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1534       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1535       /* default */
1536       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1537       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1538       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1539       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1540       if (issbaij) {
1541         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1542       } else {
1543         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1544       }
1545       /* Allow user's customization */
1546       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1547       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1548     }
1549     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1550     if (sub_schurs->reuse_mumps) {
1551       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1552 
1553       ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr);
1554     }
1555     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1556     if (!n_D) {
1557       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1558       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1559     }
1560     /* Set Up KSP for Dirichlet problem of BDDC */
1561     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1562     /* set ksp_D into pcis data */
1563     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1564     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1565     pcis->ksp_D = pcbddc->ksp_D;
1566   }
1567 
1568   /* NEUMANN PROBLEM */
1569   A_RR = 0;
1570   if (neumann) {
1571     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1572     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1573     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1574     if (pcbddc->ksp_R) { /* already created ksp */
1575       PetscInt nn_R;
1576       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1577       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1578       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1579       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1580         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1581         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1582         reuse = MAT_INITIAL_MATRIX;
1583       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1584         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1585           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1586           reuse = MAT_INITIAL_MATRIX;
1587         } else { /* safe to reuse the matrix */
1588           reuse = MAT_REUSE_MATRIX;
1589         }
1590       }
1591       /* last check */
1592       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1593         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1594         reuse = MAT_INITIAL_MATRIX;
1595       }
1596     } else { /* first time, so we need to create the matrix */
1597       reuse = MAT_INITIAL_MATRIX;
1598     }
1599     /* extract A_RR */
1600     if (!sub_schurs->reuse_mumps) {
1601       ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1602       ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1603       if (ibs != mbs) {
1604         Mat newmat;
1605         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1606         ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1607         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1608       } else {
1609         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1610       }
1611       if (pcbddc->issym) {
1612         ierr = MatSetOption(A_RR,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1613       }
1614     } else {
1615       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1616 
1617       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1618       ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr);
1619       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1620     }
1621     if (!pcbddc->ksp_R) { /* create object if not present */
1622       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1623       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1624       /* default */
1625       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1626       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1627       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1628       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1629       if (issbaij) {
1630         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1631       } else {
1632         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1633       }
1634       /* Allow user's customization */
1635       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1636       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1637     }
1638     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1639     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1640     if (!n_R) {
1641       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1642       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1643     }
1644     /* Reuse MUMPS solver if it is present */
1645     if (sub_schurs->reuse_mumps) {
1646       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1647 
1648       ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr);
1649     }
1650     /* Set Up KSP for Neumann problem of BDDC */
1651     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1652   }
1653   /* free Neumann problem's matrix */
1654   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1655 
1656   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1657   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1658     if (pcbddc->dbg_flag) {
1659       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1660       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1661       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1662     }
1663     if (dirichlet) { /* Dirichlet */
1664       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1665       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1666       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1667       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1668       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1669       /* need to be adapted? */
1670       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1671       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1672       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1673       /* print info */
1674       if (pcbddc->dbg_flag) {
1675         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1676         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1677       }
1678       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1679         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1680       }
1681     }
1682     if (neumann) { /* Neumann */
1683       ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr);
1684       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1685       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1686       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1687       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1688       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1689       /* need to be adapted? */
1690       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1691       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1692       /* print info */
1693       if (pcbddc->dbg_flag) {
1694         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1695         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1696       }
1697       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1698         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1699       }
1700     }
1701   }
1702   PetscFunctionReturn(0);
1703 }
1704 
1705 #undef __FUNCT__
1706 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1707 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
1708 {
1709   PetscErrorCode ierr;
1710   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1711 
1712   PetscFunctionBegin;
1713   ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr);
1714   if (!pcbddc->switch_static) {
1715     if (applytranspose && pcbddc->local_auxmat1) {
1716       ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1717       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1718     }
1719     ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1720     ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1721   } else {
1722     ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1723     ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1724     ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1725     ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1726     if (applytranspose && pcbddc->local_auxmat1) {
1727       ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr);
1728       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1729       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1730       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1731     }
1732   }
1733   if (applytranspose) {
1734     ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1735   } else {
1736     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1737   }
1738   ierr = VecSet(inout_B,0.);CHKERRQ(ierr);
1739   if (!pcbddc->switch_static) {
1740     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1741     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1742     if (!applytranspose && pcbddc->local_auxmat1) {
1743       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1744       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1745     }
1746   } else {
1747     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1748     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1749     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1750     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1751     if (!applytranspose && pcbddc->local_auxmat1) {
1752       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1753       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1754     }
1755     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1756     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1757     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1758     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1759   }
1760   PetscFunctionReturn(0);
1761 }
1762 
1763 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1764 #undef __FUNCT__
1765 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1766 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1767 {
1768   PetscErrorCode ierr;
1769   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1770   PC_IS*            pcis = (PC_IS*)  (pc->data);
1771   const PetscScalar zero = 0.0;
1772 
1773   PetscFunctionBegin;
1774   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1775   if (applytranspose) {
1776     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1777     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1778   } else {
1779     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1780     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1781   }
1782   /* start communications from local primal nodes to rhs of coarse solver */
1783   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1784   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1785   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1786 
1787   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1788   /* TODO remove null space when doing multilevel */
1789   if (pcbddc->coarse_ksp) {
1790     Vec rhs,sol;
1791 
1792     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
1793     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
1794     if (applytranspose) {
1795       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1796     } else {
1797       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1798     }
1799   }
1800 
1801   /* Local solution on R nodes */
1802   if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */
1803     ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr);
1804   }
1805 
1806   /* communications from coarse sol to local primal nodes */
1807   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1808   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1809 
1810   /* Sum contributions from two levels */
1811   if (applytranspose) {
1812     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1813     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1814   } else {
1815     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1816     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1817   }
1818   PetscFunctionReturn(0);
1819 }
1820 
1821 #undef __FUNCT__
1822 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1823 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1824 {
1825   PetscErrorCode ierr;
1826   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1827   PetscScalar    *array;
1828   Vec            from,to;
1829 
1830   PetscFunctionBegin;
1831   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1832     from = pcbddc->coarse_vec;
1833     to = pcbddc->vec1_P;
1834     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1835       Vec tvec;
1836 
1837       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1838       ierr = VecResetArray(tvec);CHKERRQ(ierr);
1839       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1840       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
1841       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
1842       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
1843     }
1844   } else { /* from local to global -> put data in coarse right hand side */
1845     from = pcbddc->vec1_P;
1846     to = pcbddc->coarse_vec;
1847   }
1848   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1849   PetscFunctionReturn(0);
1850 }
1851 
1852 #undef __FUNCT__
1853 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1854 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1855 {
1856   PetscErrorCode ierr;
1857   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1858   PetscScalar    *array;
1859   Vec            from,to;
1860 
1861   PetscFunctionBegin;
1862   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1863     from = pcbddc->coarse_vec;
1864     to = pcbddc->vec1_P;
1865   } else { /* from local to global -> put data in coarse right hand side */
1866     from = pcbddc->vec1_P;
1867     to = pcbddc->coarse_vec;
1868   }
1869   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1870   if (smode == SCATTER_FORWARD) {
1871     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1872       Vec tvec;
1873 
1874       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1875       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
1876       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
1877       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
1878     }
1879   } else {
1880     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
1881      ierr = VecResetArray(from);CHKERRQ(ierr);
1882     }
1883   }
1884   PetscFunctionReturn(0);
1885 }
1886 
1887 /* uncomment for testing purposes */
1888 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1889 #undef __FUNCT__
1890 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1891 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1892 {
1893   PetscErrorCode    ierr;
1894   PC_IS*            pcis = (PC_IS*)(pc->data);
1895   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1896   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1897   /* one and zero */
1898   PetscScalar       one=1.0,zero=0.0;
1899   /* space to store constraints and their local indices */
1900   PetscScalar       *constraints_data;
1901   PetscInt          *constraints_idxs,*constraints_idxs_B;
1902   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
1903   PetscInt          *constraints_n;
1904   /* iterators */
1905   PetscInt          i,j,k,total_counts,total_counts_cc,cum;
1906   /* BLAS integers */
1907   PetscBLASInt      lwork,lierr;
1908   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1909   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1910   /* reuse */
1911   PetscInt          olocal_primal_size,olocal_primal_size_cc;
1912   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
1913   /* change of basis */
1914   PetscBool         qr_needed;
1915   PetscBT           change_basis,qr_needed_idx;
1916   /* auxiliary stuff */
1917   PetscInt          *nnz,*is_indices;
1918   PetscInt          ncc;
1919   /* some quantities */
1920   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1921   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1922 
1923   PetscFunctionBegin;
1924   /* Destroy Mat objects computed previously */
1925   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1926   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1927   /* save info on constraints from previous setup (if any) */
1928   olocal_primal_size = pcbddc->local_primal_size;
1929   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
1930   ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr);
1931   ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
1932   ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
1933   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
1934   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
1935 
1936   /* print some info */
1937   if (pcbddc->dbg_flag) {
1938     IS       vertices;
1939     PetscInt nv,nedges,nfaces;
1940     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
1941     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
1942     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
1943     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1944     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1945     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
1946     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
1947     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
1948     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1949   }
1950 
1951   if (!pcbddc->adaptive_selection) {
1952     IS           ISForVertices,*ISForFaces,*ISForEdges;
1953     MatNullSpace nearnullsp;
1954     const Vec    *nearnullvecs;
1955     Vec          *localnearnullsp;
1956     PetscScalar  *array;
1957     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
1958     PetscBool    nnsp_has_cnst;
1959     /* LAPACK working arrays for SVD or POD */
1960     PetscBool    skip_lapack,boolforchange;
1961     PetscScalar  *work;
1962     PetscReal    *singular_vals;
1963 #if defined(PETSC_USE_COMPLEX)
1964     PetscReal    *rwork;
1965 #endif
1966 #if defined(PETSC_MISSING_LAPACK_GESVD)
1967     PetscScalar  *temp_basis,*correlation_mat;
1968 #else
1969     PetscBLASInt dummy_int=1;
1970     PetscScalar  dummy_scalar=1.;
1971 #endif
1972 
1973     /* Get index sets for faces, edges and vertices from graph */
1974     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1975     /* free unneeded index sets */
1976     if (!pcbddc->use_vertices) {
1977       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1978     }
1979     if (!pcbddc->use_edges) {
1980       for (i=0;i<n_ISForEdges;i++) {
1981         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1982       }
1983       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1984       n_ISForEdges = 0;
1985     }
1986     if (!pcbddc->use_faces) {
1987       for (i=0;i<n_ISForFaces;i++) {
1988         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1989       }
1990       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1991       n_ISForFaces = 0;
1992     }
1993 
1994 #if defined(PETSC_USE_DEBUG)
1995     /* HACK: when solving singular problems not using vertices, a change of basis is mandatory.
1996        Also use_change_of_basis should be consistent among processors */
1997     if (pcbddc->NullSpace) {
1998       PetscBool tbool[2],gbool[2];
1999 
2000       if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) {
2001         pcbddc->use_change_of_basis = PETSC_TRUE;
2002         if (!ISForEdges) {
2003           pcbddc->use_change_on_faces = PETSC_TRUE;
2004         }
2005       }
2006       tbool[0] = pcbddc->use_change_of_basis;
2007       tbool[1] = pcbddc->use_change_on_faces;
2008       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2009       pcbddc->use_change_of_basis = gbool[0];
2010       pcbddc->use_change_on_faces = gbool[1];
2011     }
2012 #endif
2013 
2014     /* check if near null space is attached to global mat */
2015     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
2016     if (nearnullsp) {
2017       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
2018       /* remove any stored info */
2019       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
2020       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2021       /* store information for BDDC solver reuse */
2022       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
2023       pcbddc->onearnullspace = nearnullsp;
2024       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2025       for (i=0;i<nnsp_size;i++) {
2026         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
2027       }
2028     } else { /* if near null space is not provided BDDC uses constants by default */
2029       nnsp_size = 0;
2030       nnsp_has_cnst = PETSC_TRUE;
2031     }
2032     /* get max number of constraints on a single cc */
2033     max_constraints = nnsp_size;
2034     if (nnsp_has_cnst) max_constraints++;
2035 
2036     /*
2037          Evaluate maximum storage size needed by the procedure
2038          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
2039          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
2040          There can be multiple constraints per connected component
2041                                                                                                                                                            */
2042     n_vertices = 0;
2043     if (ISForVertices) {
2044       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
2045     }
2046     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
2047     ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr);
2048 
2049     total_counts = n_ISForFaces+n_ISForEdges;
2050     total_counts *= max_constraints;
2051     total_counts += n_vertices;
2052     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2053 
2054     total_counts = 0;
2055     max_size_of_constraint = 0;
2056     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
2057       IS used_is;
2058       if (i<n_ISForEdges) {
2059         used_is = ISForEdges[i];
2060       } else {
2061         used_is = ISForFaces[i-n_ISForEdges];
2062       }
2063       ierr = ISGetSize(used_is,&j);CHKERRQ(ierr);
2064       total_counts += j;
2065       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
2066     }
2067     ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr);
2068 
2069     /* get local part of global near null space vectors */
2070     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
2071     for (k=0;k<nnsp_size;k++) {
2072       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
2073       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2074       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2075     }
2076 
2077     /* whether or not to skip lapack calls */
2078     skip_lapack = PETSC_TRUE;
2079     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
2080 
2081     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
2082     if (!skip_lapack) {
2083       PetscScalar temp_work;
2084 
2085 #if defined(PETSC_MISSING_LAPACK_GESVD)
2086       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
2087       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
2088       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
2089       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
2090 #if defined(PETSC_USE_COMPLEX)
2091       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
2092 #endif
2093       /* now we evaluate the optimal workspace using query with lwork=-1 */
2094       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2095       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
2096       lwork = -1;
2097       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2098 #if !defined(PETSC_USE_COMPLEX)
2099       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
2100 #else
2101       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
2102 #endif
2103       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2104       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
2105 #else /* on missing GESVD */
2106       /* SVD */
2107       PetscInt max_n,min_n;
2108       max_n = max_size_of_constraint;
2109       min_n = max_constraints;
2110       if (max_size_of_constraint < max_constraints) {
2111         min_n = max_size_of_constraint;
2112         max_n = max_constraints;
2113       }
2114       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
2115 #if defined(PETSC_USE_COMPLEX)
2116       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
2117 #endif
2118       /* now we evaluate the optimal workspace using query with lwork=-1 */
2119       lwork = -1;
2120       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
2121       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
2122       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
2123       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2124 #if !defined(PETSC_USE_COMPLEX)
2125       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
2126 #else
2127       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
2128 #endif
2129       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2130       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
2131 #endif /* on missing GESVD */
2132       /* Allocate optimal workspace */
2133       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
2134       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
2135     }
2136     /* Now we can loop on constraining sets */
2137     total_counts = 0;
2138     constraints_idxs_ptr[0] = 0;
2139     constraints_data_ptr[0] = 0;
2140     /* vertices */
2141     if (n_vertices) {
2142       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2143       if (nnsp_has_cnst) { /* it considers all possible vertices */
2144         ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2145         for (i=0;i<n_vertices;i++) {
2146           constraints_n[total_counts] = 1;
2147           constraints_data[total_counts] = 1.0;
2148           constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2149           constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2150           total_counts++;
2151         }
2152       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
2153         PetscBool used_vertex;
2154         for (i=0;i<n_vertices;i++) {
2155           used_vertex = PETSC_FALSE;
2156           k = 0;
2157           while (!used_vertex && k<nnsp_size) {
2158             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2159             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
2160               constraints_n[total_counts] = 1;
2161               constraints_idxs[total_counts] = is_indices[i];
2162               constraints_data[total_counts] = 1.0;
2163               constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2164               constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2165               total_counts++;
2166               used_vertex = PETSC_TRUE;
2167             }
2168             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2169             k++;
2170           }
2171         }
2172       }
2173       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2174       n_vertices = total_counts;
2175     }
2176 
2177     /* edges and faces */
2178     total_counts_cc = total_counts;
2179     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2180       IS        used_is;
2181       PetscBool idxs_copied = PETSC_FALSE;
2182 
2183       if (ncc<n_ISForEdges) {
2184         used_is = ISForEdges[ncc];
2185         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2186       } else {
2187         used_is = ISForFaces[ncc-n_ISForEdges];
2188         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2189       }
2190       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2191 
2192       ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr);
2193       ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2194       /* change of basis should not be performed on local periodic nodes */
2195       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2196       if (nnsp_has_cnst) {
2197         PetscScalar quad_value;
2198 
2199         ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2200         idxs_copied = PETSC_TRUE;
2201 
2202         if (!pcbddc->use_nnsp_true) {
2203           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2204         } else {
2205           quad_value = 1.0;
2206         }
2207         for (j=0;j<size_of_constraint;j++) {
2208           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
2209         }
2210         temp_constraints++;
2211         total_counts++;
2212       }
2213       for (k=0;k<nnsp_size;k++) {
2214         PetscReal real_value;
2215         PetscScalar *ptr_to_data;
2216 
2217         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2218         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
2219         for (j=0;j<size_of_constraint;j++) {
2220           ptr_to_data[j] = array[is_indices[j]];
2221         }
2222         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2223         /* check if array is null on the connected component */
2224         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2225         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
2226         if (real_value > 0.0) { /* keep indices and values */
2227           temp_constraints++;
2228           total_counts++;
2229           if (!idxs_copied) {
2230             ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2231             idxs_copied = PETSC_TRUE;
2232           }
2233         }
2234       }
2235       ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2236       valid_constraints = temp_constraints;
2237       if (!pcbddc->use_nnsp_true && temp_constraints) {
2238         if (temp_constraints == 1) { /* just normalize the constraint */
2239           PetscScalar norm,*ptr_to_data;
2240 
2241           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2242           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2243           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
2244           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2245           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
2246         } else { /* perform SVD */
2247           PetscReal   tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2248           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2249 
2250 #if defined(PETSC_MISSING_LAPACK_GESVD)
2251           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2252              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2253              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2254                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2255                 from that computed using LAPACKgesvd
2256              -> This is due to a different computation of eigenvectors in LAPACKheev
2257              -> The quality of the POD-computed basis will be the same */
2258           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2259           /* Store upper triangular part of correlation matrix */
2260           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2261           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2262           for (j=0;j<temp_constraints;j++) {
2263             for (k=0;k<j+1;k++) {
2264               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
2265             }
2266           }
2267           /* compute eigenvalues and eigenvectors of correlation matrix */
2268           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2269           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2270 #if !defined(PETSC_USE_COMPLEX)
2271           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2272 #else
2273           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2274 #endif
2275           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2276           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2277           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2278           j = 0;
2279           while (j < temp_constraints && singular_vals[j] < tol) j++;
2280           total_counts = total_counts-j;
2281           valid_constraints = temp_constraints-j;
2282           /* scale and copy POD basis into used quadrature memory */
2283           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2284           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2285           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2286           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2287           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2288           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2289           if (j<temp_constraints) {
2290             PetscInt ii;
2291             for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
2292             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2293             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2294             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2295             for (k=0;k<temp_constraints-j;k++) {
2296               for (ii=0;ii<size_of_constraint;ii++) {
2297                 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2298               }
2299             }
2300           }
2301 #else  /* on missing GESVD */
2302           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2303           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2304           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2305           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2306 #if !defined(PETSC_USE_COMPLEX)
2307           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2308 #else
2309           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2310 #endif
2311           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2312           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2313           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2314           k = temp_constraints;
2315           if (k > size_of_constraint) k = size_of_constraint;
2316           j = 0;
2317           while (j < k && singular_vals[k-j-1] < tol) j++;
2318           valid_constraints = k-j;
2319           total_counts = total_counts-temp_constraints+valid_constraints;
2320 #endif /* on missing GESVD */
2321         }
2322       }
2323       /* update pointers information */
2324       if (valid_constraints) {
2325         constraints_n[total_counts_cc] = valid_constraints;
2326         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
2327         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
2328         /* set change_of_basis flag */
2329         if (boolforchange) {
2330           PetscBTSet(change_basis,total_counts_cc);
2331         }
2332         total_counts_cc++;
2333       }
2334     }
2335     /* free workspace */
2336     if (!skip_lapack) {
2337       ierr = PetscFree(work);CHKERRQ(ierr);
2338 #if defined(PETSC_USE_COMPLEX)
2339       ierr = PetscFree(rwork);CHKERRQ(ierr);
2340 #endif
2341       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2342 #if defined(PETSC_MISSING_LAPACK_GESVD)
2343       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2344       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2345 #endif
2346     }
2347     for (k=0;k<nnsp_size;k++) {
2348       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2349     }
2350     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2351     /* free index sets of faces, edges and vertices */
2352     for (i=0;i<n_ISForFaces;i++) {
2353       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2354     }
2355     if (n_ISForFaces) {
2356       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2357     }
2358     for (i=0;i<n_ISForEdges;i++) {
2359       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2360     }
2361     if (n_ISForEdges) {
2362       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2363     }
2364     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2365   } else {
2366     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2367 
2368     total_counts = 0;
2369     n_vertices = 0;
2370     if (sub_schurs->is_vertices && pcbddc->use_vertices) {
2371       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
2372     }
2373     max_constraints = 0;
2374     total_counts_cc = 0;
2375     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2376       total_counts += pcbddc->adaptive_constraints_n[i];
2377       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
2378       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2379     }
2380     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
2381     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
2382     constraints_idxs = pcbddc->adaptive_constraints_idxs;
2383     constraints_data = pcbddc->adaptive_constraints_data;
2384     /* constraints_n differs from pcbddc->adaptive_constraints_n */
2385     ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr);
2386     total_counts_cc = 0;
2387     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2388       if (pcbddc->adaptive_constraints_n[i]) {
2389         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
2390       }
2391     }
2392 #if 0
2393     printf("Found %d totals (%d)\n",total_counts_cc,total_counts);
2394     for (i=0;i<total_counts_cc;i++) {
2395       printf("const %d, start %d",i,constraints_idxs_ptr[i]);
2396       printf(" end %d:\n",constraints_idxs_ptr[i+1]);
2397       for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) {
2398         printf(" %d",constraints_idxs[j]);
2399       }
2400       printf("\n");
2401       printf("number of cc: %d\n",constraints_n[i]);
2402     }
2403     for (i=0;i<n_vertices;i++) {
2404       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
2405     }
2406     for (i=0;i<sub_schurs->n_subs;i++) {
2407       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2408     }
2409 #endif
2410 
2411     max_size_of_constraint = 0;
2412     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
2413     ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr);
2414     /* Change of basis */
2415     ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr);
2416     if (pcbddc->use_change_of_basis) {
2417       for (i=0;i<sub_schurs->n_subs;i++) {
2418         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2419           ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr);
2420         }
2421       }
2422     }
2423   }
2424   pcbddc->local_primal_size = total_counts;
2425   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2426 
2427   /* map constraints_idxs in boundary numbering */
2428   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr);
2429   if (i != constraints_idxs_ptr[total_counts_cc]) {
2430     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i);
2431   }
2432 
2433   /* Create constraint matrix */
2434   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2435   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2436   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2437 
2438   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2439   /* determine if a QR strategy is needed for change of basis */
2440   qr_needed = PETSC_FALSE;
2441   ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr);
2442   total_primal_vertices=0;
2443   pcbddc->local_primal_size_cc = 0;
2444   for (i=0;i<total_counts_cc;i++) {
2445     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2446     if (size_of_constraint == 1) {
2447       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
2448       pcbddc->local_primal_size_cc += 1;
2449     } else if (PetscBTLookup(change_basis,i)) {
2450       for (k=0;k<constraints_n[i];k++) {
2451         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2452       }
2453       pcbddc->local_primal_size_cc += constraints_n[i];
2454       if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) {
2455         PetscBTSet(qr_needed_idx,i);
2456         qr_needed = PETSC_TRUE;
2457       }
2458     } else {
2459       pcbddc->local_primal_size_cc += 1;
2460     }
2461   }
2462   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
2463   pcbddc->n_vertices = total_primal_vertices;
2464   /* permute indices in order to have a sorted set of vertices */
2465   ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2466 
2467   ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
2468   ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2469   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
2470 
2471   /* nonzero structure of constraint matrix */
2472   /* and get reference dof for local constraints */
2473   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2474   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
2475 
2476   j = total_primal_vertices;
2477   total_counts = total_primal_vertices;
2478   cum = total_primal_vertices;
2479   for (i=n_vertices;i<total_counts_cc;i++) {
2480     if (!PetscBTLookup(change_basis,i)) {
2481       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
2482       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
2483       cum++;
2484       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2485       for (k=0;k<constraints_n[i];k++) {
2486         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2487         nnz[j+k] = size_of_constraint;
2488       }
2489       j += constraints_n[i];
2490     }
2491   }
2492   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2493   ierr = PetscFree(nnz);CHKERRQ(ierr);
2494 
2495   /* set values in constraint matrix */
2496   for (i=0;i<total_primal_vertices;i++) {
2497     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2498   }
2499   total_counts = total_primal_vertices;
2500   for (i=n_vertices;i<total_counts_cc;i++) {
2501     if (!PetscBTLookup(change_basis,i)) {
2502       PetscInt *cols;
2503 
2504       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2505       cols = constraints_idxs+constraints_idxs_ptr[i];
2506       for (k=0;k<constraints_n[i];k++) {
2507         PetscInt    row = total_counts+k;
2508         PetscScalar *vals;
2509 
2510         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
2511         ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2512       }
2513       total_counts += constraints_n[i];
2514     }
2515   }
2516   /* assembling */
2517   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2518   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2519 
2520   /*
2521   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2522   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2523   */
2524   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2525   if (pcbddc->use_change_of_basis) {
2526     /* dual and primal dofs on a single cc */
2527     PetscInt     dual_dofs,primal_dofs;
2528     /* working stuff for GEQRF */
2529     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2530     PetscBLASInt lqr_work;
2531     /* working stuff for UNGQR */
2532     PetscScalar  *gqr_work,lgqr_work_t;
2533     PetscBLASInt lgqr_work;
2534     /* working stuff for TRTRS */
2535     PetscScalar  *trs_rhs;
2536     PetscBLASInt Blas_NRHS;
2537     /* pointers for values insertion into change of basis matrix */
2538     PetscInt     *start_rows,*start_cols;
2539     PetscScalar  *start_vals;
2540     /* working stuff for values insertion */
2541     PetscBT      is_primal;
2542     PetscInt     *aux_primal_numbering_B;
2543     /* matrix sizes */
2544     PetscInt     global_size,local_size;
2545     /* temporary change of basis */
2546     Mat          localChangeOfBasisMatrix;
2547     /* extra space for debugging */
2548     PetscScalar  *dbg_work;
2549 
2550     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2551     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2552     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2553     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2554     /* nonzeros for local mat */
2555     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2556     for (i=0;i<pcis->n;i++) nnz[i]=1;
2557     for (i=n_vertices;i<total_counts_cc;i++) {
2558       if (PetscBTLookup(change_basis,i)) {
2559         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2560         if (PetscBTLookup(qr_needed_idx,i)) {
2561           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
2562         } else {
2563           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
2564           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
2565         }
2566       }
2567     }
2568     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2569     ierr = PetscFree(nnz);CHKERRQ(ierr);
2570     /* Set initial identity in the matrix */
2571     for (i=0;i<pcis->n;i++) {
2572       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2573     }
2574 
2575     if (pcbddc->dbg_flag) {
2576       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2577       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2578     }
2579 
2580 
2581     /* Now we loop on the constraints which need a change of basis */
2582     /*
2583        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2584        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2585 
2586        Basic blocks of change of basis matrix T computed by
2587 
2588           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2589 
2590             | 1        0   ...        0         s_1/S |
2591             | 0        1   ...        0         s_2/S |
2592             |              ...                        |
2593             | 0        ...            1     s_{n-1}/S |
2594             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2595 
2596             with S = \sum_{i=1}^n s_i^2
2597             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2598                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2599 
2600           - QR decomposition of constraints otherwise
2601     */
2602     if (qr_needed) {
2603       /* space to store Q */
2604       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2605       /* first we issue queries for optimal work */
2606       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2607       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2608       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2609       lqr_work = -1;
2610       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2611       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2612       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2613       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2614       lgqr_work = -1;
2615       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2616       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2617       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2618       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2619       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2620       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2621       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2622       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2623       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2624       /* array to store scaling factors for reflectors */
2625       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2626       /* array to store rhs and solution of triangular solver */
2627       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2628       /* allocating workspace for check */
2629       if (pcbddc->dbg_flag) {
2630         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
2631       }
2632     }
2633     /* array to store whether a node is primal or not */
2634     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2635     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2636     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2637     if (i != total_primal_vertices) {
2638       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2639     }
2640     for (i=0;i<total_primal_vertices;i++) {
2641       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2642     }
2643     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2644 
2645     /* loop on constraints and see whether or not they need a change of basis and compute it */
2646     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
2647       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
2648       if (PetscBTLookup(change_basis,total_counts)) {
2649         /* get constraint info */
2650         primal_dofs = constraints_n[total_counts];
2651         dual_dofs = size_of_constraint-primal_dofs;
2652 
2653         if (pcbddc->dbg_flag) {
2654           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr);
2655         }
2656 
2657         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2658 
2659           /* copy quadrature constraints for change of basis check */
2660           if (pcbddc->dbg_flag) {
2661             ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2662           }
2663           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2664           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2665 
2666           /* compute QR decomposition of constraints */
2667           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2668           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2669           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2670           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2671           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2672           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2673           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2674 
2675           /* explictly compute R^-T */
2676           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2677           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2678           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2679           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2680           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2681           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2682           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2683           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2684           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2685           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2686 
2687           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2688           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2689           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2690           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2691           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2692           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2693           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2694           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2695           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2696 
2697           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2698              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2699              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2700           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2701           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2702           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2703           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2704           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2705           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2706           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2707           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
2708           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2709           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2710 
2711           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2712           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
2713           /* insert cols for primal dofs */
2714           for (j=0;j<primal_dofs;j++) {
2715             start_vals = &qr_basis[j*size_of_constraint];
2716             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2717             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2718           }
2719           /* insert cols for dual dofs */
2720           for (j=0,k=0;j<dual_dofs;k++) {
2721             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
2722               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2723               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2724               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2725               j++;
2726             }
2727           }
2728 
2729           /* check change of basis */
2730           if (pcbddc->dbg_flag) {
2731             PetscInt   ii,jj;
2732             PetscBool valid_qr=PETSC_TRUE;
2733             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2734             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2735             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2736             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2737             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2738             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2739             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2740             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
2741             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2742             for (jj=0;jj<size_of_constraint;jj++) {
2743               for (ii=0;ii<primal_dofs;ii++) {
2744                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2745                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2746               }
2747             }
2748             if (!valid_qr) {
2749               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2750               for (jj=0;jj<size_of_constraint;jj++) {
2751                 for (ii=0;ii<primal_dofs;ii++) {
2752                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2753                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2754                   }
2755                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2756                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2757                   }
2758                 }
2759               }
2760             } else {
2761               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2762             }
2763           }
2764         } else { /* simple transformation block */
2765           PetscInt    row,col;
2766           PetscScalar val,norm;
2767 
2768           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2769           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
2770           for (j=0;j<size_of_constraint;j++) {
2771             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
2772             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2773             if (!PetscBTLookup(is_primal,row_B)) {
2774               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
2775               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2776               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2777             } else {
2778               for (k=0;k<size_of_constraint;k++) {
2779                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2780                 if (row != col) {
2781                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
2782                 } else {
2783                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
2784                 }
2785                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2786               }
2787             }
2788           }
2789           if (pcbddc->dbg_flag) {
2790             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2791           }
2792         }
2793       } else {
2794         if (pcbddc->dbg_flag) {
2795           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr);
2796         }
2797       }
2798     }
2799 
2800     /* free workspace */
2801     if (qr_needed) {
2802       if (pcbddc->dbg_flag) {
2803         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
2804       }
2805       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2806       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2807       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2808       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2809       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2810     }
2811     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2812     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2813     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2814 
2815     /* assembling of global change of variable */
2816     {
2817       Mat      tmat;
2818       PetscInt bs;
2819 
2820       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2821       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2822       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2823       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2824       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2825       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2826       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
2827       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
2828       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2829       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2830       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2831       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2832       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2833       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2834       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2835       ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2836       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2837       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2838     }
2839     /* check */
2840     if (pcbddc->dbg_flag) {
2841       PetscReal error;
2842       Vec       x,x_change;
2843 
2844       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2845       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2846       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2847       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2848       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2849       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2850       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2851       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2852       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2853       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2854       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2855       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2856       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2857       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2858       ierr = VecDestroy(&x);CHKERRQ(ierr);
2859       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2860     }
2861 
2862     /* adapt sub_schurs computed (if any) */
2863     if (pcbddc->use_deluxe_scaling) {
2864       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2865       if (sub_schurs->S_Ej_all) {
2866         Mat S_new,tmat;
2867         IS is_all_N;
2868 
2869         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2870         ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
2871         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2872         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2873         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2874         ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2875         sub_schurs->S_Ej_all = S_new;
2876         ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2877         if (sub_schurs->sum_S_Ej_all) {
2878           ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2879           ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2880           ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2881           sub_schurs->sum_S_Ej_all = S_new;
2882           ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2883         }
2884         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2885       }
2886     }
2887     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2888   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2889     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2890     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2891   }
2892 
2893   /* set up change of basis context */
2894   if (pcbddc->ChangeOfBasisMatrix) {
2895     PCBDDCChange_ctx change_ctx;
2896 
2897     if (!pcbddc->new_global_mat) {
2898       PetscInt global_size,local_size;
2899 
2900       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2901       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2902       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2903       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2904       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2905       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2906       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2907       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2908       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2909     } else {
2910       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2911       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2912       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2913     }
2914     if (!pcbddc->user_ChangeOfBasisMatrix) {
2915       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2916       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2917     } else {
2918       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2919       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2920     }
2921     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2922     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2923     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2924     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2925   }
2926 
2927   /* check if a new primal space has been introduced */
2928   pcbddc->new_primal_space_local = PETSC_TRUE;
2929   if (olocal_primal_size == pcbddc->local_primal_size) {
2930     ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2931     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2932     if (!pcbddc->new_primal_space_local) {
2933       ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2934       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2935     }
2936   }
2937   ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr);
2938   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2939   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2940 
2941   /* flush dbg viewer */
2942   if (pcbddc->dbg_flag) {
2943     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2944   }
2945 
2946   /* free workspace */
2947   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2948   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2949   if (!pcbddc->adaptive_selection) {
2950     ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr);
2951     ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr);
2952   } else {
2953     ierr = PetscFree5(pcbddc->adaptive_constraints_n,
2954                       pcbddc->adaptive_constraints_idxs_ptr,
2955                       pcbddc->adaptive_constraints_data_ptr,
2956                       pcbddc->adaptive_constraints_idxs,
2957                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2958     ierr = PetscFree(constraints_n);CHKERRQ(ierr);
2959     ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr);
2960   }
2961   PetscFunctionReturn(0);
2962 }
2963 
2964 #undef __FUNCT__
2965 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2966 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2967 {
2968   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2969   PC_IS       *pcis = (PC_IS*)pc->data;
2970   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2971   PetscInt    ierr,i,vertex_size,N;
2972   PetscViewer viewer=pcbddc->dbg_viewer;
2973 
2974   PetscFunctionBegin;
2975   /* Reset previously computed graph */
2976   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2977   /* Init local Graph struct */
2978   ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr);
2979   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);CHKERRQ(ierr);
2980 
2981   /* Check validity of the csr graph passed in by the user */
2982   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2983     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2984   }
2985 
2986   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2987   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2988     PetscInt  *xadj,*adjncy;
2989     PetscInt  nvtxs;
2990     PetscBool flg_row=PETSC_FALSE;
2991 
2992     if (pcbddc->use_local_adj) {
2993 
2994       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2995       if (flg_row) {
2996         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2997         pcbddc->computed_rowadj = PETSC_TRUE;
2998       }
2999       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3000     } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */
3001       IS                     is_dummy;
3002       ISLocalToGlobalMapping l2gmap_dummy;
3003       PetscInt               j,sum;
3004       PetscInt               *cxadj,*cadjncy;
3005       const PetscInt         *idxs;
3006       PCBDDCGraph            graph;
3007       PetscBT                is_on_boundary;
3008 
3009       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
3010       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
3011       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
3012       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
3013       ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr);
3014       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
3015       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3016       if (flg_row) {
3017         graph->xadj = xadj;
3018         graph->adjncy = adjncy;
3019       }
3020       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
3021       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
3022       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3023 
3024       if (pcbddc->dbg_flag) {
3025         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr);
3026         for (i=0;i<graph->ncc;i++) {
3027           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
3028         }
3029       }
3030 
3031       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
3032       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3033       for (i=0;i<pcis->n_B;i++) {
3034         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
3035       }
3036       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3037 
3038       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
3039       sum = 0;
3040       for (i=0;i<graph->ncc;i++) {
3041         PetscInt sizecc = 0;
3042         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3043           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3044             sizecc++;
3045           }
3046         }
3047         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3048           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3049             cxadj[graph->queue[j]] = sizecc;
3050           }
3051         }
3052         sum += sizecc*sizecc;
3053       }
3054       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
3055       sum = 0;
3056       for (i=0;i<pcis->n;i++) {
3057         PetscInt temp = cxadj[i];
3058         cxadj[i] = sum;
3059         sum += temp;
3060       }
3061       cxadj[pcis->n] = sum;
3062       for (i=0;i<graph->ncc;i++) {
3063         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3064           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3065             PetscInt k,sizecc = 0;
3066             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
3067               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
3068                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
3069                 sizecc++;
3070               }
3071             }
3072           }
3073         }
3074       }
3075       if (sum) {
3076         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
3077       } else {
3078         ierr = PetscFree(cxadj);CHKERRQ(ierr);
3079         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
3080       }
3081       graph->xadj = 0;
3082       graph->adjncy = 0;
3083       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
3084       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
3085     }
3086   }
3087   if (pcbddc->dbg_flag) {
3088     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3089   }
3090 
3091   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
3092   vertex_size = 1;
3093   if (pcbddc->user_provided_isfordofs) {
3094     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
3095       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3096       for (i=0;i<pcbddc->n_ISForDofs;i++) {
3097         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3098         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
3099       }
3100       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
3101       pcbddc->n_ISForDofs = 0;
3102       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
3103     }
3104     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
3105     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
3106   } else {
3107     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
3108       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
3109       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3110       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3111         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3112       }
3113     }
3114   }
3115 
3116   /* Setup of Graph */
3117   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3118     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
3119   }
3120   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3121     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
3122   }
3123   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr);
3124 
3125   /* Graph's connected components analysis */
3126   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
3127 
3128   /* print some info to stdout */
3129   if (pcbddc->dbg_flag) {
3130     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr);
3131   }
3132 
3133   /* mark topography has done */
3134   pcbddc->recompute_topography = PETSC_FALSE;
3135   PetscFunctionReturn(0);
3136 }
3137 
3138 #undef __FUNCT__
3139 #define __FUNCT__ "PCBDDCSubsetNumbering"
3140 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
3141 {
3142   Vec            local_vec,global_vec;
3143   IS             seqis,paris;
3144   VecScatter     scatter_ctx;
3145   PetscScalar    *array;
3146   PetscInt       *temp_global_dofs;
3147   PetscScalar    globalsum;
3148   PetscInt       i,j,s;
3149   PetscInt       nlocals,first_index,old_index,max_local,max_global;
3150   PetscMPIInt    rank_prec_comm,size_prec_comm;
3151   PetscInt       *dof_sizes,*dof_displs;
3152   PetscBool      first_found;
3153   PetscErrorCode ierr;
3154 
3155   PetscFunctionBegin;
3156   /* mpi buffers */
3157   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
3158   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
3159   j = ( !rank_prec_comm ? size_prec_comm : 0);
3160   ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr);
3161   /* get maximum size of subset */
3162   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
3163   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
3164   max_local = 0;
3165   for (i=0;i<n_local_dofs;i++) {
3166     if (max_local < temp_global_dofs[i] ) {
3167       max_local = temp_global_dofs[i];
3168     }
3169   }
3170   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
3171   max_global++;
3172   max_local = 0;
3173   for (i=0;i<n_local_dofs;i++) {
3174     if (max_local < local_dofs[i] ) {
3175       max_local = local_dofs[i];
3176     }
3177   }
3178   max_local++;
3179   /* allocate workspace */
3180   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
3181   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
3182   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
3183   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
3184   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
3185   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
3186   /* create scatter */
3187   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
3188   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
3189   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
3190   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
3191   ierr = ISDestroy(&paris);CHKERRQ(ierr);
3192   /* init array */
3193   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
3194   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3195   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3196   if (local_dofs_mult) {
3197     for (i=0;i<n_local_dofs;i++) {
3198       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
3199     }
3200   } else {
3201     for (i=0;i<n_local_dofs;i++) {
3202       array[local_dofs[i]]=1.0;
3203     }
3204   }
3205   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3206   /* scatter into global vec and get total number of global dofs */
3207   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3208   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3209   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
3210   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
3211   /* Fill global_vec with cumulative function for global numbering */
3212   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
3213   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
3214   nlocals = 0;
3215   first_index = -1;
3216   first_found = PETSC_FALSE;
3217   for (i=0;i<s;i++) {
3218     if (!first_found && PetscRealPart(array[i]) > 0.1) {
3219       first_found = PETSC_TRUE;
3220       first_index = i;
3221     }
3222     nlocals += (PetscInt)PetscRealPart(array[i]);
3223   }
3224   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3225   if (!rank_prec_comm) {
3226     dof_displs[0]=0;
3227     for (i=1;i<size_prec_comm;i++) {
3228       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3229     }
3230   }
3231   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3232   if (first_found) {
3233     array[first_index] += (PetscScalar)nlocals;
3234     old_index = first_index;
3235     for (i=first_index+1;i<s;i++) {
3236       if (PetscRealPart(array[i]) > 0.1) {
3237         array[i] += array[old_index];
3238         old_index = i;
3239       }
3240     }
3241   }
3242   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
3243   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3244   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3245   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3246   /* get global ordering of local dofs */
3247   ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3248   if (local_dofs_mult) {
3249     for (i=0;i<n_local_dofs;i++) {
3250       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
3251     }
3252   } else {
3253     for (i=0;i<n_local_dofs;i++) {
3254       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
3255     }
3256   }
3257   ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3258   /* free workspace */
3259   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
3260   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
3261   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
3262   ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr);
3263   /* return pointer to global ordering of local dofs */
3264   *global_numbering_subset = temp_global_dofs;
3265   PetscFunctionReturn(0);
3266 }
3267 
3268 #undef __FUNCT__
3269 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3270 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3271 {
3272   PetscInt       i,j;
3273   PetscScalar    *alphas;
3274   PetscErrorCode ierr;
3275 
3276   PetscFunctionBegin;
3277   /* this implements stabilized Gram-Schmidt */
3278   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3279   for (i=0;i<n;i++) {
3280     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3281     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3282     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3283   }
3284   ierr = PetscFree(alphas);CHKERRQ(ierr);
3285   PetscFunctionReturn(0);
3286 }
3287 
3288 #undef __FUNCT__
3289 #define __FUNCT__ "MatISGetSubassemblingPattern"
3290 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
3291 {
3292   Mat             subdomain_adj;
3293   IS              new_ranks,ranks_send_to;
3294   MatPartitioning partitioner;
3295   Mat_IS          *matis;
3296   PetscInt        n_neighs,*neighs,*n_shared,**shared;
3297   PetscInt        prank;
3298   PetscMPIInt     size,rank,color;
3299   PetscInt        *xadj,*adjncy,*oldranks;
3300   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
3301   PetscInt        i,local_size,threshold=0;
3302   PetscErrorCode  ierr;
3303   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3304   PetscSubcomm    subcomm;
3305 
3306   PetscFunctionBegin;
3307   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3308   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3309   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3310 
3311   /* Get info on mapping */
3312   matis = (Mat_IS*)(mat->data);
3313   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3314   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3315 
3316   /* build local CSR graph of subdomains' connectivity */
3317   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3318   xadj[0] = 0;
3319   xadj[1] = PetscMax(n_neighs-1,0);
3320   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3321   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3322 
3323   if (threshold) {
3324     PetscInt xadj_count = 0;
3325     for (i=1;i<n_neighs;i++) {
3326       if (n_shared[i] > threshold) {
3327         adjncy[xadj_count] = neighs[i];
3328         adjncy_wgt[xadj_count] = n_shared[i];
3329         xadj_count++;
3330       }
3331     }
3332     xadj[1] = xadj_count;
3333   } else {
3334     if (xadj[1]) {
3335       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3336       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3337     }
3338   }
3339   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3340   if (use_square) {
3341     for (i=0;i<xadj[1];i++) {
3342       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3343     }
3344   }
3345   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3346 
3347   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3348 
3349   /*
3350     Restrict work on active processes only.
3351   */
3352   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3353   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3354   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3355   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3356   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3357   if (color) {
3358     ierr = PetscFree(xadj);CHKERRQ(ierr);
3359     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3360     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3361   } else {
3362     PetscInt coarsening_ratio;
3363     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
3364     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3365     prank = rank;
3366     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
3367     /*
3368     for (i=0;i<size;i++) {
3369       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3370     }
3371     */
3372     for (i=0;i<xadj[1];i++) {
3373       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3374     }
3375     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3376     ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3377     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3378 
3379     /* Partition */
3380     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
3381     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3382     if (use_vwgt) {
3383       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3384       v_wgt[0] = local_size;
3385       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3386     }
3387     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3388     coarsening_ratio = size/n_subdomains;
3389     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3390     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3391     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3392     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3393 
3394     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3395     if (contiguous) {
3396       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3397     } else {
3398       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3399     }
3400     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3401     /* clean up */
3402     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3403     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3404     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3405     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3406   }
3407   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3408 
3409   /* assemble parallel IS for sends */
3410   i = 1;
3411   if (color) i=0;
3412   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3413 
3414   /* get back IS */
3415   *is_sends = ranks_send_to;
3416   PetscFunctionReturn(0);
3417 }
3418 
3419 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3420 
3421 #undef __FUNCT__
3422 #define __FUNCT__ "MatISSubassemble"
3423 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3424 {
3425   Mat                    local_mat;
3426   Mat_IS                 *matis;
3427   IS                     is_sends_internal;
3428   PetscInt               rows,cols,new_local_rows;
3429   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3430   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3431   ISLocalToGlobalMapping l2gmap;
3432   PetscInt*              l2gmap_indices;
3433   const PetscInt*        is_indices;
3434   MatType                new_local_type;
3435   /* buffers */
3436   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3437   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3438   PetscInt               *recv_buffer_idxs_local;
3439   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3440   /* MPI */
3441   MPI_Comm               comm,comm_n;
3442   PetscSubcomm           subcomm;
3443   PetscMPIInt            n_sends,n_recvs,commsize;
3444   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3445   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3446   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3447   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3448   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3449   PetscErrorCode         ierr;
3450 
3451   PetscFunctionBegin;
3452   /* TODO: add missing checks */
3453   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3454   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3455   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3456   PetscValidLogicalCollectiveInt(mat,nis,7);
3457   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3458   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3459   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3460   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3461   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3462   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3463   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3464   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3465     PetscInt mrows,mcols,mnrows,mncols;
3466     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3467     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3468     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3469     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3470     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3471     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3472   }
3473   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3474   PetscValidLogicalCollectiveInt(mat,bs,0);
3475   /* prepare IS for sending if not provided */
3476   if (!is_sends) {
3477     PetscBool pcontig = PETSC_TRUE;
3478     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3479     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3480   } else {
3481     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3482     is_sends_internal = is_sends;
3483   }
3484 
3485   /* get pointer of MATIS data */
3486   matis = (Mat_IS*)mat->data;
3487 
3488   /* get comm */
3489   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3490 
3491   /* compute number of sends */
3492   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3493   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3494 
3495   /* compute number of receives */
3496   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3497   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3498   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3499   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3500   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3501   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3502   ierr = PetscFree(iflags);CHKERRQ(ierr);
3503 
3504   /* restrict comm if requested */
3505   subcomm = 0;
3506   destroy_mat = PETSC_FALSE;
3507   if (restrict_comm) {
3508     PetscMPIInt color,subcommsize;
3509 
3510     color = 0;
3511     if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */
3512     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3513     subcommsize = commsize - subcommsize;
3514     /* check if reuse has been requested */
3515     if (reuse == MAT_REUSE_MATRIX) {
3516       if (*mat_n) {
3517         PetscMPIInt subcommsize2;
3518         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3519         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3520         comm_n = PetscObjectComm((PetscObject)*mat_n);
3521       } else {
3522         comm_n = PETSC_COMM_SELF;
3523       }
3524     } else { /* MAT_INITIAL_MATRIX */
3525       PetscMPIInt rank;
3526 
3527       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3528       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3529       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3530       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3531       comm_n = PetscSubcommChild(subcomm);
3532     }
3533     /* flag to destroy *mat_n if not significative */
3534     if (color) destroy_mat = PETSC_TRUE;
3535   } else {
3536     comm_n = comm;
3537   }
3538 
3539   /* prepare send/receive buffers */
3540   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3541   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3542   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3543   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3544   if (nis) {
3545     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3546   }
3547 
3548   /* Get data from local matrices */
3549   if (!isdense) {
3550     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3551     /* TODO: See below some guidelines on how to prepare the local buffers */
3552     /*
3553        send_buffer_vals should contain the raw values of the local matrix
3554        send_buffer_idxs should contain:
3555        - MatType_PRIVATE type
3556        - PetscInt        size_of_l2gmap
3557        - PetscInt        global_row_indices[size_of_l2gmap]
3558        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3559     */
3560   } else {
3561     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3562     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3563     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3564     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3565     send_buffer_idxs[1] = i;
3566     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3567     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3568     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3569     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3570     for (i=0;i<n_sends;i++) {
3571       ilengths_vals[is_indices[i]] = len*len;
3572       ilengths_idxs[is_indices[i]] = len+2;
3573     }
3574   }
3575   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3576   /* additional is (if any) */
3577   if (nis) {
3578     PetscMPIInt psum;
3579     PetscInt j;
3580     for (j=0,psum=0;j<nis;j++) {
3581       PetscInt plen;
3582       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3583       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3584       psum += len+1; /* indices + lenght */
3585     }
3586     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3587     for (j=0,psum=0;j<nis;j++) {
3588       PetscInt plen;
3589       const PetscInt *is_array_idxs;
3590       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3591       send_buffer_idxs_is[psum] = plen;
3592       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3593       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3594       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3595       psum += plen+1; /* indices + lenght */
3596     }
3597     for (i=0;i<n_sends;i++) {
3598       ilengths_idxs_is[is_indices[i]] = psum;
3599     }
3600     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3601   }
3602 
3603   buf_size_idxs = 0;
3604   buf_size_vals = 0;
3605   buf_size_idxs_is = 0;
3606   for (i=0;i<n_recvs;i++) {
3607     buf_size_idxs += (PetscInt)olengths_idxs[i];
3608     buf_size_vals += (PetscInt)olengths_vals[i];
3609     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3610   }
3611   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3612   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3613   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3614 
3615   /* get new tags for clean communications */
3616   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3617   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3618   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3619 
3620   /* allocate for requests */
3621   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3622   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3623   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3624   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3625   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3626   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3627 
3628   /* communications */
3629   ptr_idxs = recv_buffer_idxs;
3630   ptr_vals = recv_buffer_vals;
3631   ptr_idxs_is = recv_buffer_idxs_is;
3632   for (i=0;i<n_recvs;i++) {
3633     source_dest = onodes[i];
3634     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3635     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3636     ptr_idxs += olengths_idxs[i];
3637     ptr_vals += olengths_vals[i];
3638     if (nis) {
3639       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3640       ptr_idxs_is += olengths_idxs_is[i];
3641     }
3642   }
3643   for (i=0;i<n_sends;i++) {
3644     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3645     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3646     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3647     if (nis) {
3648       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3649     }
3650   }
3651   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3652   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3653 
3654   /* assemble new l2g map */
3655   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3656   ptr_idxs = recv_buffer_idxs;
3657   new_local_rows = 0;
3658   for (i=0;i<n_recvs;i++) {
3659     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3660     ptr_idxs += olengths_idxs[i];
3661   }
3662   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3663   ptr_idxs = recv_buffer_idxs;
3664   new_local_rows = 0;
3665   for (i=0;i<n_recvs;i++) {
3666     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3667     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3668     ptr_idxs += olengths_idxs[i];
3669   }
3670   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3671   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3672   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3673 
3674   /* infer new local matrix type from received local matrices type */
3675   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3676   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3677   if (n_recvs) {
3678     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3679     ptr_idxs = recv_buffer_idxs;
3680     for (i=0;i<n_recvs;i++) {
3681       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3682         new_local_type_private = MATAIJ_PRIVATE;
3683         break;
3684       }
3685       ptr_idxs += olengths_idxs[i];
3686     }
3687     switch (new_local_type_private) {
3688       case MATDENSE_PRIVATE:
3689         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3690           new_local_type = MATSEQAIJ;
3691           bs = 1;
3692         } else { /* if I receive only 1 dense matrix */
3693           new_local_type = MATSEQDENSE;
3694           bs = 1;
3695         }
3696         break;
3697       case MATAIJ_PRIVATE:
3698         new_local_type = MATSEQAIJ;
3699         bs = 1;
3700         break;
3701       case MATBAIJ_PRIVATE:
3702         new_local_type = MATSEQBAIJ;
3703         break;
3704       case MATSBAIJ_PRIVATE:
3705         new_local_type = MATSEQSBAIJ;
3706         break;
3707       default:
3708         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3709         break;
3710     }
3711   } else { /* by default, new_local_type is seqdense */
3712     new_local_type = MATSEQDENSE;
3713     bs = 1;
3714   }
3715 
3716   /* create MATIS object if needed */
3717   if (reuse == MAT_INITIAL_MATRIX) {
3718     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3719     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3720   } else {
3721     /* it also destroys the local matrices */
3722     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3723   }
3724   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3725   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3726 
3727   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3728 
3729   /* Global to local map of received indices */
3730   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3731   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3732   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3733 
3734   /* restore attributes -> type of incoming data and its size */
3735   buf_size_idxs = 0;
3736   for (i=0;i<n_recvs;i++) {
3737     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3738     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3739     buf_size_idxs += (PetscInt)olengths_idxs[i];
3740   }
3741   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3742 
3743   /* set preallocation */
3744   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3745   if (!newisdense) {
3746     PetscInt *new_local_nnz=0;
3747 
3748     ptr_vals = recv_buffer_vals;
3749     ptr_idxs = recv_buffer_idxs_local;
3750     if (n_recvs) {
3751       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3752     }
3753     for (i=0;i<n_recvs;i++) {
3754       PetscInt j;
3755       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3756         for (j=0;j<*(ptr_idxs+1);j++) {
3757           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3758         }
3759       } else {
3760         /* TODO */
3761       }
3762       ptr_idxs += olengths_idxs[i];
3763     }
3764     if (new_local_nnz) {
3765       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3766       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3767       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3768       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3769       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3770       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3771     } else {
3772       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3773     }
3774     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3775   } else {
3776     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3777   }
3778 
3779   /* set values */
3780   ptr_vals = recv_buffer_vals;
3781   ptr_idxs = recv_buffer_idxs_local;
3782   for (i=0;i<n_recvs;i++) {
3783     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3784       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3785       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3786       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3787       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3788       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3789     } else {
3790       /* TODO */
3791     }
3792     ptr_idxs += olengths_idxs[i];
3793     ptr_vals += olengths_vals[i];
3794   }
3795   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3796   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3797   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3798   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3799   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3800   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3801 
3802 #if 0
3803   if (!restrict_comm) { /* check */
3804     Vec       lvec,rvec;
3805     PetscReal infty_error;
3806 
3807     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3808     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3809     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3810     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3811     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3812     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3813     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3814     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3815     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3816   }
3817 #endif
3818 
3819   /* assemble new additional is (if any) */
3820   if (nis) {
3821     PetscInt **temp_idxs,*count_is,j,psum;
3822 
3823     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3824     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3825     ptr_idxs = recv_buffer_idxs_is;
3826     psum = 0;
3827     for (i=0;i<n_recvs;i++) {
3828       for (j=0;j<nis;j++) {
3829         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3830         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3831         psum += plen;
3832         ptr_idxs += plen+1; /* shift pointer to received data */
3833       }
3834     }
3835     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3836     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3837     for (i=1;i<nis;i++) {
3838       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3839     }
3840     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3841     ptr_idxs = recv_buffer_idxs_is;
3842     for (i=0;i<n_recvs;i++) {
3843       for (j=0;j<nis;j++) {
3844         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3845         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3846         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3847         ptr_idxs += plen+1; /* shift pointer to received data */
3848       }
3849     }
3850     for (i=0;i<nis;i++) {
3851       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3852       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3853       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3854     }
3855     ierr = PetscFree(count_is);CHKERRQ(ierr);
3856     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3857     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3858   }
3859   /* free workspace */
3860   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3861   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3862   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3863   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3864   if (isdense) {
3865     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3866     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3867   } else {
3868     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3869   }
3870   if (nis) {
3871     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3872     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3873   }
3874   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3875   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3876   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3877   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3878   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3879   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3880   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3881   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3882   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3883   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3884   ierr = PetscFree(onodes);CHKERRQ(ierr);
3885   if (nis) {
3886     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3887     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3888     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3889   }
3890   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3891   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3892     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3893     for (i=0;i<nis;i++) {
3894       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3895     }
3896   }
3897   PetscFunctionReturn(0);
3898 }
3899 
3900 /* temporary hack into ksp private data structure */
3901 #include <petsc/private/kspimpl.h>
3902 
3903 #undef __FUNCT__
3904 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3905 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3906 {
3907   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3908   PC_IS                  *pcis = (PC_IS*)pc->data;
3909   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3910   MatNullSpace           CoarseNullSpace=NULL;
3911   ISLocalToGlobalMapping coarse_islg;
3912   IS                     coarse_is,*isarray;
3913   PetscInt               i,im_active=-1,active_procs=-1;
3914   PetscInt               nis,nisdofs,nisneu;
3915   PC                     pc_temp;
3916   PCType                 coarse_pc_type;
3917   KSPType                coarse_ksp_type;
3918   PetscBool              multilevel_requested,multilevel_allowed;
3919   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3920   Mat                    t_coarse_mat_is;
3921   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3922   PetscMPIInt            all_procs;
3923   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3924   PetscBool              compute_vecs = PETSC_FALSE;
3925   PetscScalar            *array;
3926   PetscErrorCode         ierr;
3927 
3928   PetscFunctionBegin;
3929   /* Assign global numbering to coarse dofs */
3930   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3931     PetscInt ocoarse_size;
3932     compute_vecs = PETSC_TRUE;
3933     ocoarse_size = pcbddc->coarse_size;
3934     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3935     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3936     /* see if we can avoid some work */
3937     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3938       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3939         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3940         coarse_reuse = PETSC_FALSE;
3941       } else { /* we can safely reuse already computed coarse matrix */
3942         coarse_reuse = PETSC_TRUE;
3943       }
3944     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3945       coarse_reuse = PETSC_FALSE;
3946     }
3947     /* reset any subassembling information */
3948     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3949     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3950   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3951     coarse_reuse = PETSC_TRUE;
3952   }
3953 
3954   /* count "active" (i.e. with positive local size) and "void" processes */
3955   im_active = !!(pcis->n);
3956   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3957   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3958   void_procs = all_procs-active_procs;
3959   csin_type_simple = PETSC_TRUE;
3960   redist = PETSC_FALSE;
3961   if (pcbddc->current_level && void_procs) {
3962     csin_ml = PETSC_TRUE;
3963     ncoarse_ml = void_procs;
3964     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
3965     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
3966       csin_ds = PETSC_TRUE;
3967       ncoarse_ds = pcbddc->redistribute_coarse;
3968       redist = PETSC_TRUE;
3969     } else {
3970       csin_ds = PETSC_TRUE;
3971       ncoarse_ds = active_procs;
3972       redist = PETSC_TRUE;
3973     }
3974   } else {
3975     csin_ml = PETSC_FALSE;
3976     ncoarse_ml = all_procs;
3977     if (void_procs) {
3978       csin_ds = PETSC_TRUE;
3979       ncoarse_ds = void_procs;
3980       csin_type_simple = PETSC_FALSE;
3981     } else {
3982       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
3983         csin_ds = PETSC_TRUE;
3984         ncoarse_ds = pcbddc->redistribute_coarse;
3985         redist = PETSC_TRUE;
3986       } else {
3987         csin_ds = PETSC_FALSE;
3988         ncoarse_ds = all_procs;
3989       }
3990     }
3991   }
3992 
3993   /*
3994     test if we can go multilevel: three conditions must be satisfied:
3995     - we have not exceeded the number of levels requested
3996     - we can actually subassemble the active processes
3997     - we can find a suitable number of MPI processes where we can place the subassembled problem
3998   */
3999   multilevel_allowed = PETSC_FALSE;
4000   multilevel_requested = PETSC_FALSE;
4001   if (pcbddc->current_level < pcbddc->max_levels) {
4002     multilevel_requested = PETSC_TRUE;
4003     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
4004       multilevel_allowed = PETSC_FALSE;
4005     } else {
4006       multilevel_allowed = PETSC_TRUE;
4007     }
4008   }
4009   /* determine number of process partecipating to coarse solver */
4010   if (multilevel_allowed) {
4011     ncoarse = ncoarse_ml;
4012     csin = csin_ml;
4013     redist = PETSC_FALSE;
4014   } else {
4015     ncoarse = ncoarse_ds;
4016     csin = csin_ds;
4017   }
4018 
4019   /* creates temporary l2gmap and IS for coarse indexes */
4020   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
4021   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
4022 
4023   /* creates temporary MATIS object for coarse matrix */
4024   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
4025   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4026   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
4027   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4028 #if 0
4029   {
4030     PetscViewer viewer;
4031     char filename[256];
4032     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
4033     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
4034     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4035     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
4036     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4037   }
4038 #endif
4039   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
4040   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
4041   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4042   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4043   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
4044 
4045   /* compute dofs splitting and neumann boundaries for coarse dofs */
4046   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
4047     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
4048     const PetscInt         *idxs;
4049     ISLocalToGlobalMapping tmap;
4050 
4051     /* create map between primal indices (in local representative ordering) and local primal numbering */
4052     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
4053     /* allocate space for temporary storage */
4054     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
4055     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
4056     /* allocate for IS array */
4057     nisdofs = pcbddc->n_ISForDofsLocal;
4058     nisneu = !!pcbddc->NeumannBoundariesLocal;
4059     nis = nisdofs + nisneu;
4060     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
4061     /* dofs splitting */
4062     for (i=0;i<nisdofs;i++) {
4063       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4064       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4065       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4066       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4067       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4068       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4069       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4070       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4071     }
4072     /* neumann boundaries */
4073     if (pcbddc->NeumannBoundariesLocal) {
4074       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4075       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4076       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4077       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4078       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4079       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4080       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4081       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4082     }
4083     /* free memory */
4084     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4085     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4086     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4087   } else {
4088     nis = 0;
4089     nisdofs = 0;
4090     nisneu = 0;
4091     isarray = NULL;
4092   }
4093   /* destroy no longer needed map */
4094   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4095 
4096   /* restrict on coarse candidates (if needed) */
4097   coarse_mat_is = NULL;
4098   if (csin) {
4099     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4100       if (redist) {
4101         PetscMPIInt rank;
4102         PetscInt    spc,n_spc_p1,dest[1],destsize;
4103 
4104         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4105         spc = active_procs/ncoarse;
4106         n_spc_p1 = active_procs%ncoarse;
4107         if (im_active) {
4108           destsize = 1;
4109           if (rank > n_spc_p1*(spc+1)-1) {
4110             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4111           } else {
4112             dest[0] = rank/(spc+1);
4113           }
4114         } else {
4115           destsize = 0;
4116         }
4117         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4118       } else if (csin_type_simple) {
4119         PetscMPIInt rank;
4120         PetscInt    issize,isidx;
4121 
4122         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4123         if (im_active) {
4124           issize = 1;
4125           isidx = (PetscInt)rank;
4126         } else {
4127           issize = 0;
4128           isidx = -1;
4129         }
4130         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4131       } else { /* get a suitable subassembling pattern from MATIS code */
4132         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4133       }
4134 
4135       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4136       if (!redist || ncoarse <= void_procs) {
4137         PetscInt ncoarse_cand,tissize,*nisindices;
4138         PetscInt *coarse_candidates;
4139         const PetscInt* tisindices;
4140 
4141         /* get coarse candidates' ranks in pc communicator */
4142         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4143         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4144         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4145           if (!coarse_candidates[i]) {
4146             coarse_candidates[ncoarse_cand++]=i;
4147           }
4148         }
4149         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4150 
4151 
4152         if (pcbddc->dbg_flag) {
4153           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4154           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4155           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4156           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4157           for (i=0;i<ncoarse_cand;i++) {
4158             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4159           }
4160           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4161           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4162         }
4163         /* shift the pattern on coarse candidates */
4164         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4165         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4166         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4167         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4168         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4169         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4170         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4171       }
4172       if (pcbddc->dbg_flag) {
4173         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4174         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4175         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4176         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4177       }
4178     }
4179     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4180     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4181   } else {
4182     if (pcbddc->dbg_flag) {
4183       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4184       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4185       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4186     }
4187     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4188     coarse_mat_is = t_coarse_mat_is;
4189   }
4190 
4191   /* create local to global scatters for coarse problem */
4192   if (compute_vecs) {
4193     PetscInt lrows;
4194     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4195     if (coarse_mat_is) {
4196       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4197     } else {
4198       lrows = 0;
4199     }
4200     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4201     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4202     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4203     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4204     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4205   }
4206   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4207   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4208 
4209   /* set defaults for coarse KSP and PC */
4210   if (multilevel_allowed) {
4211     coarse_ksp_type = KSPRICHARDSON;
4212     coarse_pc_type = PCBDDC;
4213   } else {
4214     coarse_ksp_type = KSPPREONLY;
4215     coarse_pc_type = PCREDUNDANT;
4216   }
4217 
4218   /* print some info if requested */
4219   if (pcbddc->dbg_flag) {
4220     if (!multilevel_allowed) {
4221       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4222       if (multilevel_requested) {
4223         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4224       } else if (pcbddc->max_levels) {
4225         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4226       }
4227       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4228     }
4229   }
4230 
4231   /* create the coarse KSP object only once with defaults */
4232   if (coarse_mat_is) {
4233     MatReuse coarse_mat_reuse;
4234     PetscViewer dbg_viewer = NULL;
4235     if (pcbddc->dbg_flag) {
4236       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4237       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4238     }
4239     if (!pcbddc->coarse_ksp) {
4240       char prefix[256],str_level[16];
4241       size_t len;
4242       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4243       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4244       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4245       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4246       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4247       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4248       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4249       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4250       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4251       /* prefix */
4252       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4253       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4254       if (!pcbddc->current_level) {
4255         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4256         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4257       } else {
4258         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4259         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4260         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4261         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4262         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4263         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4264       }
4265       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4266       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4267       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4268       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4269       ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4270       /* allow user customization */
4271       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4272     }
4273     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4274     if (nisdofs) {
4275       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4276       for (i=0;i<nisdofs;i++) {
4277         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4278       }
4279     }
4280     if (nisneu) {
4281       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4282       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4283     }
4284 
4285     /* get some info after set from options */
4286     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4287     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4288     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4289     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4290     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4291       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4292       isbddc = PETSC_FALSE;
4293     }
4294     if (isredundant) {
4295       KSP inner_ksp;
4296       PC inner_pc;
4297       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4298       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4299       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4300     }
4301 
4302     /* assemble coarse matrix */
4303     if (coarse_reuse) {
4304       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4305       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4306       coarse_mat_reuse = MAT_REUSE_MATRIX;
4307     } else {
4308       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4309     }
4310     if (isbddc || isnn) {
4311       if (pcbddc->coarsening_ratio > 1) {
4312         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4313           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4314           if (pcbddc->dbg_flag) {
4315             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4316             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4317             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4318             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4319           }
4320         }
4321         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4322       } else {
4323         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4324         coarse_mat = coarse_mat_is;
4325       }
4326     } else {
4327       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4328     }
4329     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4330 
4331     /* propagate symmetry info to coarse matrix */
4332     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
4333     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4334 
4335     /* set operators */
4336     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4337     if (pcbddc->dbg_flag) {
4338       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4339     }
4340   } else { /* processes non partecipating to coarse solver (if any) */
4341     coarse_mat = 0;
4342   }
4343   ierr = PetscFree(isarray);CHKERRQ(ierr);
4344 #if 0
4345   {
4346     PetscViewer viewer;
4347     char filename[256];
4348     sprintf(filename,"coarse_mat.m");
4349     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4350     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4351     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4352     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4353   }
4354 #endif
4355 
4356   /* Compute coarse null space (special handling by BDDC only) */
4357   if (pcbddc->NullSpace) {
4358     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4359   }
4360 
4361   if (pcbddc->coarse_ksp) {
4362     Vec crhs,csol;
4363     PetscBool ispreonly;
4364     if (CoarseNullSpace) {
4365       if (isbddc) {
4366         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4367       } else {
4368         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4369       }
4370     }
4371     /* setup coarse ksp */
4372     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4373     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4374     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4375     /* hack */
4376     if (!csol) {
4377       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4378     }
4379     if (!crhs) {
4380       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4381     }
4382     /* Check coarse problem if in debug mode or if solving with an iterative method */
4383     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4384     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4385       KSP       check_ksp;
4386       KSPType   check_ksp_type;
4387       PC        check_pc;
4388       Vec       check_vec,coarse_vec;
4389       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4390       PetscInt  its;
4391       PetscBool compute_eigs;
4392       PetscReal *eigs_r,*eigs_c;
4393       PetscInt  neigs;
4394       const char *prefix;
4395 
4396       /* Create ksp object suitable for estimation of extreme eigenvalues */
4397       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4398       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4399       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4400       if (ispreonly) {
4401         check_ksp_type = KSPPREONLY;
4402         compute_eigs = PETSC_FALSE;
4403       } else {
4404         check_ksp_type = KSPGMRES;
4405         compute_eigs = PETSC_TRUE;
4406       }
4407       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4408       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4409       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4410       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4411       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4412       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4413       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4414       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4415       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4416       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4417       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4418       /* create random vec */
4419       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4420       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4421       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4422       if (CoarseNullSpace) {
4423         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4424       }
4425       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4426       /* solve coarse problem */
4427       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4428       if (CoarseNullSpace) {
4429         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4430       }
4431       /* set eigenvalue estimation if preonly has not been requested */
4432       if (compute_eigs) {
4433         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4434         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4435         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4436         lambda_max = eigs_r[neigs-1];
4437         lambda_min = eigs_r[0];
4438         if (pcbddc->use_coarse_estimates) {
4439           if (lambda_max>lambda_min) {
4440             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4441             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4442           }
4443         }
4444       }
4445 
4446       /* check coarse problem residual error */
4447       if (pcbddc->dbg_flag) {
4448         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4449         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4450         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4451         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4452         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4453         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4454         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4455         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4456         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4457         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4458         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4459         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4460         if (compute_eigs) {
4461           PetscReal lambda_max_s,lambda_min_s;
4462           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4463           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4464           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4465           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4466           for (i=0;i<neigs;i++) {
4467             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4468           }
4469         }
4470         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4471         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4472       }
4473       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4474       if (compute_eigs) {
4475         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4476         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4477       }
4478     }
4479   }
4480   /* print additional info */
4481   if (pcbddc->dbg_flag) {
4482     /* waits until all processes reaches this point */
4483     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4484     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4485     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4486   }
4487 
4488   /* free memory */
4489   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4490   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4491   PetscFunctionReturn(0);
4492 }
4493 
4494 #undef __FUNCT__
4495 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4496 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4497 {
4498   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4499   PC_IS*         pcis = (PC_IS*)pc->data;
4500   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4501   PetscInt       i,local_size,coarse_size=0;
4502   PetscInt       *local_primal_indices=NULL;
4503   PetscInt       *t_local_primal_indices=NULL;
4504   PetscErrorCode ierr;
4505 
4506   PetscFunctionBegin;
4507   /* Compute global number of coarse dofs */
4508   if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) {
4509     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
4510   }
4511   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult,&coarse_size,&t_local_primal_indices);CHKERRQ(ierr);
4512   ierr = PetscMalloc1(pcbddc->local_primal_size,&local_primal_indices);CHKERRQ(ierr);
4513   local_size = 0;
4514   for (i=0;i<pcbddc->local_primal_size_cc;i++) {
4515     PetscInt j;
4516     for (j=0;j<pcbddc->local_primal_ref_mult[i];j++) local_primal_indices[local_size++] = t_local_primal_indices[i] + j;
4517   }
4518   ierr = PetscFree(t_local_primal_indices);CHKERRQ(ierr);
4519   if (local_size != pcbddc->local_primal_size) {
4520     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size);
4521   }
4522 
4523   /* check numbering */
4524   if (pcbddc->dbg_flag) {
4525     PetscScalar coarsesum,*array;
4526     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4527 
4528     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4529     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4530     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4531     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4532     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4533     for (i=0;i<pcbddc->local_primal_size;i++) {
4534       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4535     }
4536     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4537     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4538     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4539     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4540     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4541     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4542     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4543     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4544     for (i=0;i<pcis->n;i++) {
4545       if (array[i] == 1.0) {
4546         set_error = PETSC_TRUE;
4547         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4548       }
4549     }
4550     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4551     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4552     for (i=0;i<pcis->n;i++) {
4553       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4554     }
4555     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4556     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4557     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4558     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4559     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4560     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4561     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4562       PetscInt *gidxs;
4563 
4564       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
4565       ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
4566       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4567       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4568       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4569       for (i=0;i<pcbddc->local_primal_size;i++) {
4570         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr);
4571       }
4572       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4573       ierr = PetscFree(gidxs);CHKERRQ(ierr);
4574     }
4575     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4576     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4577   }
4578   /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */
4579   /* get back data */
4580   *coarse_size_n = coarse_size;
4581   *local_primal_indices_n = local_primal_indices;
4582   PetscFunctionReturn(0);
4583 }
4584 
4585 #undef __FUNCT__
4586 #define __FUNCT__ "PCBDDCGlobalToLocal"
4587 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4588 {
4589   IS             localis_t;
4590   PetscInt       i,lsize,*idxs,n;
4591   PetscScalar    *vals;
4592   PetscErrorCode ierr;
4593 
4594   PetscFunctionBegin;
4595   /* get indices in local ordering exploiting local to global map */
4596   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4597   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4598   for (i=0;i<lsize;i++) vals[i] = 1.0;
4599   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4600   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4601   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4602   if (idxs) { /* multilevel guard */
4603     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4604   }
4605   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4606   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4607   ierr = PetscFree(vals);CHKERRQ(ierr);
4608   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4609   /* now compute set in local ordering */
4610   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4611   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4612   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4613   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4614   for (i=0,lsize=0;i<n;i++) {
4615     if (PetscRealPart(vals[i]) > 0.5) {
4616       lsize++;
4617     }
4618   }
4619   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4620   for (i=0,lsize=0;i<n;i++) {
4621     if (PetscRealPart(vals[i]) > 0.5) {
4622       idxs[lsize++] = i;
4623     }
4624   }
4625   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4626   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4627   *localis = localis_t;
4628   PetscFunctionReturn(0);
4629 }
4630 
4631 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4632 #undef __FUNCT__
4633 #define __FUNCT__ "PCBDDCMatMult_Private"
4634 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4635 {
4636   PCBDDCChange_ctx change_ctx;
4637   PetscErrorCode   ierr;
4638 
4639   PetscFunctionBegin;
4640   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4641   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4642   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4643   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4644   PetscFunctionReturn(0);
4645 }
4646 
4647 #undef __FUNCT__
4648 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4649 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4650 {
4651   PCBDDCChange_ctx change_ctx;
4652   PetscErrorCode   ierr;
4653 
4654   PetscFunctionBegin;
4655   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4656   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4657   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4658   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4659   PetscFunctionReturn(0);
4660 }
4661 
4662 #undef __FUNCT__
4663 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4664 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4665 {
4666   PC_IS               *pcis=(PC_IS*)pc->data;
4667   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4668   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4669   Mat                 S_j;
4670   PetscInt            *used_xadj,*used_adjncy;
4671   PetscBool           free_used_adj;
4672   PetscErrorCode      ierr;
4673 
4674   PetscFunctionBegin;
4675   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4676   free_used_adj = PETSC_FALSE;
4677   if (pcbddc->sub_schurs_layers == -1) {
4678     used_xadj = NULL;
4679     used_adjncy = NULL;
4680   } else {
4681     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4682       used_xadj = pcbddc->mat_graph->xadj;
4683       used_adjncy = pcbddc->mat_graph->adjncy;
4684     } else if (pcbddc->computed_rowadj) {
4685       used_xadj = pcbddc->mat_graph->xadj;
4686       used_adjncy = pcbddc->mat_graph->adjncy;
4687     } else {
4688       PetscBool      flg_row=PETSC_FALSE;
4689       const PetscInt *xadj,*adjncy;
4690       PetscInt       nvtxs;
4691 
4692       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4693       if (flg_row) {
4694         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4695         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4696         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4697         free_used_adj = PETSC_TRUE;
4698       } else {
4699         pcbddc->sub_schurs_layers = -1;
4700         used_xadj = NULL;
4701         used_adjncy = NULL;
4702       }
4703       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4704     }
4705   }
4706 
4707   /* setup sub_schurs data */
4708   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4709   if (!sub_schurs->use_mumps) {
4710     /* pcbddc->ksp_D up to date only if not using MUMPS */
4711     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4712     ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4713   } else {
4714     PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
4715     ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4716   }
4717   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4718 
4719   /* free adjacency */
4720   if (free_used_adj) {
4721     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4722   }
4723   PetscFunctionReturn(0);
4724 }
4725 
4726 #undef __FUNCT__
4727 #define __FUNCT__ "PCBDDCInitSubSchurs"
4728 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4729 {
4730   PC_IS               *pcis=(PC_IS*)pc->data;
4731   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4732   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4733   PCBDDCGraph         graph;
4734   PetscErrorCode      ierr;
4735 
4736   PetscFunctionBegin;
4737   /* attach interface graph for determining subsets */
4738   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4739     IS verticesIS;
4740 
4741     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4742     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4743     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr);
4744     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr);
4745     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4746     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4747 /*
4748     if (pcbddc->dbg_flag) {
4749       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4750     }
4751 */
4752   } else {
4753     graph = pcbddc->mat_graph;
4754   }
4755 
4756   /* sub_schurs init */
4757   ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr);
4758 
4759   /* free graph struct */
4760   if (pcbddc->sub_schurs_rebuild) {
4761     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4762   }
4763   PetscFunctionReturn(0);
4764 }
4765