xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision d62866d3572c18c38c1338fc8db51aa3e0f76869)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscReal       *eigs,thresh;
20   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
21   PetscBool       allocated_S_St;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscErrorCode  ierr;
26 
27   PetscFunctionBegin;
28   if (!sub_schurs->use_mumps) {
29     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
30   }
31 
32   if (pcbddc->dbg_flag) {
33     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
34     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
35     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
36     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
37   }
38 
39   if (pcbddc->dbg_flag) {
40     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
41   }
42 
43   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
44     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
45   }
46 
47   /* max size of subsets */
48   mss = 0;
49   for (i=0;i<sub_schurs->n_subs;i++) {
50     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
51       PetscInt subset_size;
52       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
53       mss = PetscMax(mss,subset_size);
54     }
55   }
56 
57   /* min/max and threshold */
58   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
59   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
60   nmax = PetscMax(nmin,nmax);
61   allocated_S_St = PETSC_FALSE;
62   if (nmin) {
63     allocated_S_St = PETSC_TRUE;
64   }
65 
66   /* allocate lapack workspace */
67   cum = cum2 = 0;
68   maxneigs = 0;
69   for (i=0;i<sub_schurs->n_subs;i++) {
70     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
71       PetscInt n,subset_size;
72 
73       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
74       n = PetscMin(subset_size,nmax);
75       cum += subset_size;
76       cum2 += subset_size*n;
77       maxneigs = PetscMax(maxneigs,n);
78     }
79   }
80   if (mss) {
81     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
82       PetscBLASInt B_itype = 1;
83       PetscBLASInt B_N = mss;
84       PetscReal    zero = 0.0;
85       PetscReal    eps = 0.0; /* dlamch? */
86 
87       B_lwork = -1;
88       S = NULL;
89       St = NULL;
90       eigs = NULL;
91       eigv = NULL;
92       B_iwork = NULL;
93       B_ifail = NULL;
94       thresh = 1.0;
95       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
96 #if defined(PETSC_USE_COMPLEX)
97       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
98 #else
99       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
100 #endif
101       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
102       ierr = PetscFPTrapPop();CHKERRQ(ierr);
103     } else {
104         /* TODO */
105     }
106   } else {
107     lwork = 0;
108   }
109 
110   nv = 0;
111   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
112     ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr);
113   }
114   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
115   if (allocated_S_St) {
116     ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr);
117   }
118   ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
119 #if defined(PETSC_USE_COMPLEX)
120   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
121 #endif
122   ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
123                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
124                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
125                       nv+cum,&pcbddc->adaptive_constraints_idxs,
126                       nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
127   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
128 
129   maxneigs = 0;
130   cum = cum2 = cumarray = 0;
131   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
132   pcbddc->adaptive_constraints_data_ptr[0] = 0;
133   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
134     const PetscInt *idxs;
135 
136     ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
137     for (cum=0;cum<nv;cum++) {
138       pcbddc->adaptive_constraints_n[cum] = 1;
139       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
140       pcbddc->adaptive_constraints_data[cum] = 1.0;
141       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
142       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
143     }
144     cum2 = cum;
145     ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
146   }
147 
148   if (mss) { /* multilevel */
149     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
150     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
151   }
152 
153   for (i=0;i<sub_schurs->n_subs;i++) {
154     PetscInt j,subset_size;
155 
156     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
157     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
158       const PetscInt *idxs;
159       PetscReal      infty = PETSC_MAX_REAL;
160       PetscInt       eigs_start = 0;
161       PetscBLASInt   B_N;
162 
163       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
164       if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
165         if (sub_schurs->is_hermitian) {
166           PetscInt j;
167           for (j=0;j<subset_size;j++) {
168             ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
169           }
170           for (j=0;j<subset_size;j++) {
171             ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
172           }
173         } else {
174           ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
175           ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
176         }
177       } else {
178         S = Sarray + cumarray;
179         St = Starray + cumarray;
180       }
181 
182       /* Threshold: this is an heuristic for edges */
183       ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
184       thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold;
185 
186       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
187         PetscBLASInt B_itype = 1;
188         PetscBLASInt B_IL, B_IU;
189         PetscReal    eps = -1.0; /* dlamch? */
190         PetscInt     nmin_s;
191 
192         /* ask for eigenvalues larger than thresh */
193         if (pcbddc->dbg_flag) {
194           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
195         }
196         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
197 #if defined(PETSC_USE_COMPLEX)
198         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
199 #else
200         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
201 #endif
202         ierr = PetscFPTrapPop();CHKERRQ(ierr);
203         if (B_ierr) {
204           if (B_ierr < 0 ) {
205             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
206           } else if (B_ierr <= B_N) {
207             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
208           } else {
209             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
210           }
211         }
212 
213         if (B_neigs > nmax) {
214           if (pcbddc->dbg_flag) {
215             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
216           }
217           eigs_start = B_neigs -nmax;
218           B_neigs = nmax;
219         }
220 
221         nmin_s = PetscMin(nmin,B_N);
222         if (B_neigs < nmin_s) {
223           PetscBLASInt B_neigs2;
224 
225           B_IU = B_N - B_neigs;
226           B_IL = B_N - nmin_s + 1;
227           if (pcbddc->dbg_flag) {
228             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
229           }
230           if (sub_schurs->is_hermitian) {
231             PetscInt j;
232             for (j=0;j<subset_size;j++) {
233               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
234             }
235             for (j=0;j<subset_size;j++) {
236               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
237             }
238           } else {
239             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
240             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
241           }
242           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
243 #if defined(PETSC_USE_COMPLEX)
244           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
245 #else
246           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
247 #endif
248           ierr = PetscFPTrapPop();CHKERRQ(ierr);
249           B_neigs += B_neigs2;
250         }
251         if (B_ierr) {
252           if (B_ierr < 0 ) {
253             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
254           } else if (B_ierr <= B_N) {
255             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
256           } else {
257             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
258           }
259         }
260         if (pcbddc->dbg_flag) {
261           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);
262           for (j=0;j<B_neigs;j++) {
263             if (eigs[j] == 0.0) {
264               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");
265             } else {
266               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);
267             }
268           }
269         }
270       } else {
271           /* TODO */
272       }
273 
274       maxneigs = PetscMax(B_neigs,maxneigs);
275       pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
276       if (B_neigs) {
277         ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
278 
279         if (pcbddc->dbg_flag > 1) {
280           PetscInt ii;
281           for (ii=0;ii<B_neigs;ii++) {
282             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
283             for (j=0;j<B_N;j++) {
284               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
285             }
286           }
287         }
288 #if 0
289         for (j=0;j<B_neigs;j++) {
290           PetscBLASInt Blas_N,Blas_one = 1.0;
291           PetscScalar norm;
292           ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
293           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,
294                                                      &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
295           if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
296             norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
297           } else {
298             norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
299           }
300           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
301         }
302 #endif
303         ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
304         pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
305         pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
306         cum++;
307       }
308       ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
309     }
310     /* shift for next computation */
311     cumarray += subset_size*subset_size;
312   }
313   if (pcbddc->dbg_flag) {
314     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
315   }
316 
317   if (mss) {
318     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
319     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
320     /* destroy matrices (junk) */
321     ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
322     ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
323   }
324   if (allocated_S_St) {
325     ierr = PetscFree2(S,St);CHKERRQ(ierr);
326   }
327   ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
328 #if defined(PETSC_USE_COMPLEX)
329   ierr = PetscFree(rwork);CHKERRQ(ierr);
330 #endif
331   if (pcbddc->dbg_flag) {
332     PetscInt maxneigs_r;
333     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
334     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
335   }
336   PetscFunctionReturn(0);
337 }
338 
339 #undef __FUNCT__
340 #define __FUNCT__ "PCBDDCSetUpSolvers"
341 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
342 {
343   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
344   PetscScalar    *coarse_submat_vals;
345   PetscErrorCode ierr;
346 
347   PetscFunctionBegin;
348   /* Setup local scatters R_to_B and (optionally) R_to_D */
349   /* PCBDDCSetUpLocalWorkVectors should be called first! */
350   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
351 
352   /* Setup local neumann solver ksp_R */
353   /* PCBDDCSetUpLocalScatters should be called first! */
354   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
355 
356   /* Change global null space passed in by the user if change of basis has been requested */
357   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
358     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
359   }
360 
361   /*
362      Setup local correction and local part of coarse basis.
363      Gives back the dense local part of the coarse matrix in column major ordering
364   */
365   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
366 
367   /* Compute total number of coarse nodes and setup coarse solver */
368   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
369 
370   /* free */
371   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
372   PetscFunctionReturn(0);
373 }
374 
375 #undef __FUNCT__
376 #define __FUNCT__ "PCBDDCResetCustomization"
377 PetscErrorCode PCBDDCResetCustomization(PC pc)
378 {
379   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
380   PetscErrorCode ierr;
381 
382   PetscFunctionBegin;
383   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
384   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
385   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
386   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
387   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
388   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
389   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
390   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
391   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
392   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
393   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
394   PetscFunctionReturn(0);
395 }
396 
397 #undef __FUNCT__
398 #define __FUNCT__ "PCBDDCResetTopography"
399 PetscErrorCode PCBDDCResetTopography(PC pc)
400 {
401   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
402   PetscErrorCode ierr;
403 
404   PetscFunctionBegin;
405   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
406   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
407   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
408   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
409   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
410   PetscFunctionReturn(0);
411 }
412 
413 #undef __FUNCT__
414 #define __FUNCT__ "PCBDDCResetSolvers"
415 PetscErrorCode PCBDDCResetSolvers(PC pc)
416 {
417   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
418   PetscScalar    *array;
419   PetscErrorCode ierr;
420 
421   PetscFunctionBegin;
422   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
423   if (pcbddc->coarse_phi_B) {
424     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
425     ierr = PetscFree(array);CHKERRQ(ierr);
426   }
427   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
428   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
429   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
430   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
431   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
432   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
433   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
434   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
435   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
436   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
437   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
438   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
439   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
440   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
441   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
442   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
443   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
444   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
445   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
446   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
447   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
448   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
449   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
450   PetscFunctionReturn(0);
451 }
452 
453 #undef __FUNCT__
454 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
455 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
456 {
457   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
458   PC_IS          *pcis = (PC_IS*)pc->data;
459   VecType        impVecType;
460   PetscInt       n_constraints,n_R,old_size;
461   PetscErrorCode ierr;
462 
463   PetscFunctionBegin;
464   if (!pcbddc->ConstraintMatrix) {
465     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
466   }
467   /* get sizes */
468   n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices;
469   n_R = pcis->n-pcbddc->n_vertices;
470   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
471   /* local work vectors (try to avoid unneeded work)*/
472   /* R nodes */
473   old_size = -1;
474   if (pcbddc->vec1_R) {
475     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
476   }
477   if (n_R != old_size) {
478     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
479     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
480     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
481     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
482     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
483     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
484   }
485   /* local primal dofs */
486   old_size = -1;
487   if (pcbddc->vec1_P) {
488     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
489   }
490   if (pcbddc->local_primal_size != old_size) {
491     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
492     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
493     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
494     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
495   }
496   /* local explicit constraints */
497   old_size = -1;
498   if (pcbddc->vec1_C) {
499     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
500   }
501   if (n_constraints && n_constraints != old_size) {
502     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
503     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
504     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
505     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
506   }
507   PetscFunctionReturn(0);
508 }
509 
510 #undef __FUNCT__
511 #define __FUNCT__ "PCBDDCSetUpCorrection"
512 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
513 {
514   PetscErrorCode  ierr;
515   /* pointers to pcis and pcbddc */
516   PC_IS*          pcis = (PC_IS*)pc->data;
517   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
518   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
519   /* submatrices of local problem */
520   Mat             A_RV,A_VR,A_VV,local_auxmat2_R;
521   /* submatrices of local coarse problem */
522   Mat             S_VV,S_CV,S_VC,S_CC;
523   /* working matrices */
524   Mat             C_CR;
525   /* additional working stuff */
526   PC              pc_R;
527   Mat             F;
528   PetscBool       isLU,isCHOL,isILU;
529 
530   PetscScalar     *coarse_submat_vals; /* TODO: use a PETSc matrix */
531   PetscScalar     *work;
532   PetscInt        *idx_V_B;
533   PetscInt        n,n_vertices,n_constraints;
534   PetscInt        i,n_R,n_D,n_B;
535   PetscBool       unsymmetric_check;
536   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
537   MatType         impMatType;
538   /* some shortcuts to scalars */
539   PetscScalar     one=1.0,m_one=-1.0;
540 
541   PetscFunctionBegin;
542   n_vertices = pcbddc->n_vertices;
543   n_constraints = pcbddc->local_primal_size-n_vertices;
544   /* Set Non-overlapping dimensions */
545   n_B = pcis->n_B;
546   n_D = pcis->n - n_B;
547   n_R = pcis->n - n_vertices;
548 
549   /* Set types for local objects needed by BDDC precondtioner */
550   impMatType = MATSEQDENSE;
551 
552   /* vertices in boundary numbering */
553   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
554   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr);
555   if (i != n_vertices) {
556     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
557   }
558 
559   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
560   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
561   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
562   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
563   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
564   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
565   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
566   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
567   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
568   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
569 
570   unsymmetric_check = PETSC_FALSE;
571   /* allocate workspace */
572   n = 0;
573   if (n_constraints) {
574     n += n_R*n_constraints;
575   }
576   if (n_vertices) {
577     n = PetscMax(2*n_R*n_vertices,n);
578     n = PetscMax((n_R+n_B)*n_vertices,n);
579   }
580   if (!pcbddc->issym) {
581     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
582     unsymmetric_check = PETSC_TRUE;
583   }
584   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
585 
586   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
587   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
588   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
589   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
590   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
591   if (isLU || isILU || isCHOL) {
592     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
593   } else if (sub_schurs->reuse_mumps) {
594     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
595     MatFactorType type;
596 
597     F = NULL;
598     ierr = MatGetFactorType(reuse_mumps->F,&type);CHKERRQ(ierr);
599     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
600   } else {
601     F = NULL;
602   }
603 
604   /* Precompute stuffs needed for preprocessing and application of BDDC*/
605   if (n_constraints) {
606     Mat         M1,M2,M3;
607     Mat         auxmat;
608     IS          is_aux;
609     PetscScalar *array,*array2;
610 
611     ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
612     ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
613 
614     /* Extract constraints on R nodes: C_{CR}  */
615     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
616     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
617     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr);
618 
619     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
620     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
621     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
622     for (i=0;i<n_constraints;i++) {
623       const PetscScalar *row_cmat_values;
624       const PetscInt    *row_cmat_indices;
625       PetscInt          size_of_constraint,j;
626 
627       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
628       for (j=0;j<size_of_constraint;j++) {
629         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
630       }
631       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
632     }
633     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr);
634     if (F) {
635       Mat B;
636 
637       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
638       ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr);
639       ierr = MatDestroy(&B);CHKERRQ(ierr);
640     } else {
641       PetscScalar *marr;
642 
643       ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
644       for (i=0;i<n_constraints;i++) {
645         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
646         ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr);
647         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
648         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
649         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
650       }
651       ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
652     }
653     if (!pcbddc->switch_static) {
654       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr);
655       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
656       ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
657       for (i=0;i<n_constraints;i++) {
658         ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr);
659         ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr);
660         ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
661         ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
662         ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
663         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
664       }
665       ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
666       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
667       ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
668     } else {
669       ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr);
670       pcbddc->local_auxmat2 = local_auxmat2_R;
671       ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
672     }
673     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
674     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
675     ierr = MatScale(M3,m_one);CHKERRQ(ierr);
676     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
677     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
678     if (isCHOL) {
679       ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr);
680     } else {
681       ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
682     }
683     ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr);
684     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
685     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
686     ierr = MatDestroy(&M2);CHKERRQ(ierr);
687     ierr = MatDestroy(&M3);CHKERRQ(ierr);
688     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
689     ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
690     ierr = MatDestroy(&auxmat);CHKERRQ(ierr);
691     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
692     ierr = MatDestroy(&M1);CHKERRQ(ierr);
693   }
694   /* Get submatrices from subdomain matrix */
695   if (n_vertices) {
696     Mat       newmat;
697     IS        is_aux;
698     PetscInt  ibs,mbs;
699     PetscBool issbaij;
700 
701     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
702     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
703     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
704     if (ibs != mbs) { /* need to convert to SEQAIJ */
705       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
706       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
707       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
708       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
709       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
710       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
711     } else {
712       /* this is safe */
713       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
714       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
715       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
716         ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
717         ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
718         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
719         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
720         ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
721         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
722       } else {
723         ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
724         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
725         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
726       }
727       /* need to convert to SeqAIJ for MatMatMult with dense matrix */
728       ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
729     }
730     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
731   }
732 
733   /* Matrix of coarse basis functions (local) */
734   if (pcbddc->coarse_phi_B) {
735     PetscInt on_B,on_primal,on_D=n_D;
736     if (pcbddc->coarse_phi_D) {
737       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
738     }
739     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
740     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
741       PetscScalar *marray;
742 
743       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
744       ierr = PetscFree(marray);CHKERRQ(ierr);
745       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
746       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
747       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
748       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
749     }
750   }
751 
752   if (!pcbddc->coarse_phi_B) {
753     PetscScalar *marray;
754 
755     n = n_B*pcbddc->local_primal_size;
756     if (pcbddc->switch_static || pcbddc->dbg_flag) {
757       n += n_D*pcbddc->local_primal_size;
758     }
759     if (!pcbddc->issym) {
760       n *= 2;
761     }
762     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
763     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
764     n = n_B*pcbddc->local_primal_size;
765     if (pcbddc->switch_static || pcbddc->dbg_flag) {
766       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
767       n += n_D*pcbddc->local_primal_size;
768     }
769     if (!pcbddc->issym) {
770       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
771       if (pcbddc->switch_static || pcbddc->dbg_flag) {
772         n = n_B*pcbddc->local_primal_size;
773         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
774       }
775     } else {
776       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
777       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
778       if (pcbddc->switch_static || pcbddc->dbg_flag) {
779         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
780         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
781       }
782     }
783   }
784   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
785   /* vertices */
786   if (n_vertices) {
787 
788     if (n_R) {
789       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
790       PetscBLASInt B_N,B_one = 1;
791       PetscScalar  *x,*y;
792 
793       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
794       ierr = MatScale(A_RV,m_one);CHKERRQ(ierr);
795       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
796       if (F) {
797         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
798       } else {
799         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
800         for (i=0;i<n_vertices;i++) {
801           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
802           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
803           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
804           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
805           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
806         }
807         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
808       }
809       ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
810       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
811       if (n_constraints) {
812         Mat B;
813 
814         ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
815         for (i=0;i<n_vertices;i++) {
816           ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
817           ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr);
818           ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
819           ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
820           ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
821           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
822         }
823         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
824         ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
825         ierr = MatDestroy(&B);CHKERRQ(ierr);
826         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
827         ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
828         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
829         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
830         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
831         ierr = MatDestroy(&B);CHKERRQ(ierr);
832       }
833       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
834       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
835       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
836       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
837       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
838       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
839       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
840       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
841       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
842       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
843     } else {
844       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
845     }
846     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
847     /* coarse basis functions */
848     for (i=0;i<n_vertices;i++) {
849       PetscScalar *y;
850 
851       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
852       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
853       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
854       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
855       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
856       y[n_B*i+idx_V_B[i]] = 1.0;
857       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
858       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
859 
860       if (pcbddc->switch_static || pcbddc->dbg_flag) {
861         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
862         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
863         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
864         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
865         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
866         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
867       }
868       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
869     }
870   }
871 
872   if (n_constraints) {
873     Mat B;
874 
875     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
876     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
877     ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
878     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
879     if (n_vertices) {
880       if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
881         ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr);
882       } else {
883         Mat S_VCt;
884 
885         ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr);
886         ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
887         ierr = MatDestroy(&S_VCt);CHKERRQ(ierr);
888       }
889     }
890     ierr = MatDestroy(&B);CHKERRQ(ierr);
891     /* coarse basis functions */
892     for (i=0;i<n_constraints;i++) {
893       PetscScalar *y;
894 
895       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
896       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
897       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
898       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
899       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
900       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
901       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
902       if (pcbddc->switch_static || pcbddc->dbg_flag) {
903         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
904         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
905         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
906         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
907         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
908         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
909       }
910       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
911     }
912   }
913   if (n_constraints) {
914     ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr);
915   }
916 
917   /* compute other basis functions for non-symmetric problems */
918   if (!pcbddc->issym) {
919 
920     if (n_constraints) {
921       Mat S_CCT,B_C;
922 
923       /* this is a lazy thing */
924       ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
925       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
926       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
927       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
928       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
929       if (n_vertices) {
930         Mat B_V,S_VCT;
931 
932         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
933         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
934         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
935         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
936         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
937       }
938       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
939     } else { /* if there are no constraints, reset work */
940       ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
941     }
942     if (n_vertices && n_R) {
943       Mat          A_VRT;
944       PetscScalar  *marray;
945       PetscBLASInt B_N,B_one = 1;
946 
947       ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr);
948       ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
949       ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr);
950       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
951       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one));
952       ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr);
953       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
954     }
955 
956     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
957       for (i=0;i<pcbddc->local_primal_size;i++) {
958         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
959         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
960         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
961         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
962         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
963       }
964     } else {
965       for (i=0;i<pcbddc->local_primal_size;i++) {
966         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
967         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
968         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
969         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
970         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
971       }
972     }
973     /* coarse basis functions */
974     for (i=0;i<pcbddc->local_primal_size;i++) {
975       PetscScalar *y;
976 
977       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
978       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
979       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
980       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
981       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
982       if (i<n_vertices) {
983         y[n_B*i+idx_V_B[i]] = 1.0;
984       }
985       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
986       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
987 
988       if (pcbddc->switch_static || pcbddc->dbg_flag) {
989         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
990         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
991         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
992         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
993         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
994         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
995       }
996       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
997     }
998   }
999   /* free memory */
1000   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
1001   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
1002   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
1003   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
1004   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
1005   ierr = PetscFree(work);CHKERRQ(ierr);
1006   if (n_vertices) {
1007     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1008   }
1009   if (n_constraints) {
1010     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1011   }
1012   /* Checking coarse_sub_mat and coarse basis functios */
1013   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1014   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1015   if (pcbddc->dbg_flag) {
1016     Mat         coarse_sub_mat;
1017     Mat         AUXMAT,TM1,TM2,TM3,TM4;
1018     Mat         coarse_phi_D,coarse_phi_B;
1019     Mat         coarse_psi_D,coarse_psi_B;
1020     Mat         A_II,A_BB,A_IB,A_BI;
1021     Mat         C_B,CPHI;
1022     IS          is_dummy;
1023     Vec         mones;
1024     MatType     checkmattype=MATSEQAIJ;
1025     PetscReal   real_value;
1026 
1027     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
1028     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
1029     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
1030     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
1031     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
1032     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
1033     if (unsymmetric_check) {
1034       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
1035       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
1036     }
1037     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
1038 
1039     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1040     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr);
1041     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1042     if (unsymmetric_check) {
1043       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1044       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1045       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1046       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1047       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1048       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1049       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1050       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1051       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1052       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1053       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1054       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1055     } else {
1056       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1057       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1058       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1059       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1060       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1061       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1062       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1063       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1064     }
1065     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1066     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1067     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1068     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1069     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1070     ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1071     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1072     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d      matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1073 
1074     /* check constraints */
1075     ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr);
1076     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1077     ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1078     ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr);
1079     ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1080     ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1081     ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1082     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1083     ierr = MatDestroy(&C_B);CHKERRQ(ierr);
1084     ierr = MatDestroy(&CPHI);CHKERRQ(ierr);
1085     ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1086     ierr = VecDestroy(&mones);CHKERRQ(ierr);
1087 
1088     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1089     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1090     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1091     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1092     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1093     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1094     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1095     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1096     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1097     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1098     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1099     if (unsymmetric_check) {
1100       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1101       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1102     }
1103     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1104   }
1105   /* get back data */
1106   *coarse_submat_vals_n = coarse_submat_vals;
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 #undef __FUNCT__
1111 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1112 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1113 {
1114   Mat            *work_mat;
1115   IS             isrow_s,iscol_s;
1116   PetscBool      rsorted,csorted;
1117   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1118   PetscErrorCode ierr;
1119 
1120   PetscFunctionBegin;
1121   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1122   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1123   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1124   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1125 
1126   if (!rsorted) {
1127     const PetscInt *idxs;
1128     PetscInt *idxs_sorted,i;
1129 
1130     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1131     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1132     for (i=0;i<rsize;i++) {
1133       idxs_perm_r[i] = i;
1134     }
1135     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1136     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1137     for (i=0;i<rsize;i++) {
1138       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1139     }
1140     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1141     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1142   } else {
1143     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1144     isrow_s = isrow;
1145   }
1146 
1147   if (!csorted) {
1148     if (isrow == iscol) {
1149       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1150       iscol_s = isrow_s;
1151     } else {
1152       const PetscInt *idxs;
1153       PetscInt *idxs_sorted,i;
1154 
1155       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1156       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1157       for (i=0;i<csize;i++) {
1158         idxs_perm_c[i] = i;
1159       }
1160       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1161       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1162       for (i=0;i<csize;i++) {
1163         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1164       }
1165       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1166       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1167     }
1168   } else {
1169     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1170     iscol_s = iscol;
1171   }
1172 
1173   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1174 
1175   if (!rsorted || !csorted) {
1176     Mat      new_mat;
1177     IS       is_perm_r,is_perm_c;
1178 
1179     if (!rsorted) {
1180       PetscInt *idxs_r,i;
1181       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1182       for (i=0;i<rsize;i++) {
1183         idxs_r[idxs_perm_r[i]] = i;
1184       }
1185       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1186       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1187     } else {
1188       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1189     }
1190     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1191 
1192     if (!csorted) {
1193       if (isrow_s == iscol_s) {
1194         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1195         is_perm_c = is_perm_r;
1196       } else {
1197         PetscInt *idxs_c,i;
1198         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1199         for (i=0;i<csize;i++) {
1200           idxs_c[idxs_perm_c[i]] = i;
1201         }
1202         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1203         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1204       }
1205     } else {
1206       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1207     }
1208     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1209 
1210     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1211     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1212     work_mat[0] = new_mat;
1213     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1214     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1215   }
1216 
1217   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1218   *B = work_mat[0];
1219   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1220   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1221   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1222   PetscFunctionReturn(0);
1223 }
1224 
1225 #undef __FUNCT__
1226 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1227 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1228 {
1229   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1230   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1231   Mat            new_mat;
1232   IS             is_local,is_global;
1233   PetscInt       local_size;
1234   PetscBool      isseqaij;
1235   PetscErrorCode ierr;
1236 
1237   PetscFunctionBegin;
1238   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1239   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1240   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1241   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1242   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1243   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1244   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1245 
1246   /* check */
1247   if (pcbddc->dbg_flag) {
1248     Vec       x,x_change;
1249     PetscReal error;
1250 
1251     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1252     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1253     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1254     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1255     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1256     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1257     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1258     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1259     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1260     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1261     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1262     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1263     ierr = VecDestroy(&x);CHKERRQ(ierr);
1264     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1265   }
1266 
1267   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1268   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1269   if (isseqaij) {
1270     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1271   } else {
1272     Mat work_mat;
1273     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1274     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1275     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1276   }
1277   ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
1278 #if !defined(PETSC_USE_COMPLEX)
1279   ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,pcbddc->issym);CHKERRQ(ierr);
1280 #endif
1281   /*
1282   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1283   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1284   */
1285   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1286   PetscFunctionReturn(0);
1287 }
1288 
1289 #undef __FUNCT__
1290 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1291 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1292 {
1293   PC_IS*          pcis = (PC_IS*)(pc->data);
1294   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
1295   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1296   IS              is_aux1,is_aux2;
1297   PetscInt        *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1298   PetscInt        n_vertices,i,j,n_R,n_D,n_B;
1299   PetscInt        vbs,bs;
1300   PetscBT         bitmask;
1301   PetscErrorCode  ierr;
1302 
1303   PetscFunctionBegin;
1304   /*
1305     No need to setup local scatters if
1306       - primal space is unchanged
1307         AND
1308       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1309         AND
1310       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1311   */
1312   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1313     PetscFunctionReturn(0);
1314   }
1315   /* destroy old objects */
1316   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1317   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1318   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1319   /* Set Non-overlapping dimensions */
1320   n_B = pcis->n_B;
1321   n_D = pcis->n - n_B;
1322   n_vertices = pcbddc->n_vertices;
1323   /* create auxiliary bitmask */
1324   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1325   for (i=0;i<n_vertices;i++) {
1326     ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr);
1327   }
1328 
1329   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1330   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1331   for (i=0, n_R=0; i<pcis->n; i++) {
1332     if (!PetscBTLookup(bitmask,i)) {
1333       idx_R_local[n_R] = i;
1334       n_R++;
1335     }
1336   }
1337 
1338   /* Block code */
1339   vbs = 1;
1340   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1341   if (bs>1 && !(n_vertices%bs)) {
1342     PetscBool is_blocked = PETSC_TRUE;
1343     PetscInt  *vary;
1344     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1345     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1346     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1347     for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
1348     for (i=0; i<n_vertices/bs; i++) {
1349       if (vary[i]!=0 && vary[i]!=bs) {
1350         is_blocked = PETSC_FALSE;
1351         break;
1352       }
1353     }
1354     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1355       vbs = bs;
1356       for (i=0;i<n_R/vbs;i++) {
1357         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1358       }
1359     }
1360     ierr = PetscFree(vary);CHKERRQ(ierr);
1361   }
1362   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1363   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1364 
1365   /* print some info if requested */
1366   if (pcbddc->dbg_flag) {
1367     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1368     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1369     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1370     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1371     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1372     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1373     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1374   }
1375 
1376   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1377   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1378   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1379   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1380   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1381   for (i=0; i<n_D; i++) {
1382     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1383   }
1384   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1385   for (i=0, j=0; i<n_R; i++) {
1386     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1387       aux_array1[j++] = i;
1388     }
1389   }
1390   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1391   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1392   for (i=0, j=0; i<n_B; i++) {
1393     if (!PetscBTLookup(bitmask,is_indices[i])) {
1394       aux_array2[j++] = i;
1395     }
1396   }
1397   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1398   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1399   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1400   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1401   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1402 
1403   if (pcbddc->switch_static || pcbddc->dbg_flag) {
1404     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1405     for (i=0, j=0; i<n_R; i++) {
1406       if (PetscBTLookup(bitmask,idx_R_local[i])) {
1407         aux_array1[j++] = i;
1408       }
1409     }
1410     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1411     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1412     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1413   }
1414   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1415   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1416 
1417   /* auxiliary scatters are needed if we are reusing MUMPS Schur solver */
1418   if (sub_schurs->reuse_mumps) {
1419     PCBDDCReuseMumps       reuse_mumps = sub_schurs->reuse_mumps;
1420     Vec                    tvec;
1421     PetscInt               schur_size;
1422     ISLocalToGlobalMapping R_to_N;
1423     IS                     list[2],tisB,tisB_N,tisR_R,tisR;
1424 
1425     ierr = VecScatterDestroy(&reuse_mumps->correction_scatter_B);CHKERRQ(ierr);
1426     ierr = VecScatterDestroy(&reuse_mumps->correction_scatter_R);CHKERRQ(ierr);
1427     ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&R_to_N);CHKERRQ(ierr);
1428     if (sub_schurs->is_dir) {
1429       list[0] = sub_schurs->is_Ej_all;
1430       list[1] = sub_schurs->is_dir;
1431       ierr = ISConcatenate(PETSC_COMM_SELF,2,list,&tisB);CHKERRQ(ierr);
1432     } else {
1433       ierr = ISDuplicate(sub_schurs->is_Ej_all,&tisB);CHKERRQ(ierr);
1434     }
1435     ierr = ISGetLocalSize(tisB,&schur_size);CHKERRQ(ierr);
1436     ierr = VecCreate(PETSC_COMM_SELF,&tvec);CHKERRQ(ierr);
1437     ierr = VecSetSizes(tvec,PETSC_DECIDE,schur_size);CHKERRQ(ierr);
1438     ierr = VecSetType(tvec,VECSEQ);CHKERRQ(ierr);
1439     ierr = VecScatterCreate(pcis->vec1_B,tisB,tvec,NULL,&reuse_mumps->correction_scatter_B);CHKERRQ(ierr);
1440     ierr = VecDestroy(&tvec);CHKERRQ(ierr);
1441     ierr = ISLocalToGlobalMappingApplyIS(sub_schurs->BtoNmap,tisB,&tisB_N);CHKERRQ(ierr);
1442     ierr = ISDestroy(&tisB);CHKERRQ(ierr);
1443     list[0] = pcis->is_I_local;
1444     list[1] = tisB_N;
1445     ierr = ISConcatenate(PETSC_COMM_SELF,2,list,&tisR);CHKERRQ(ierr);
1446     ierr = ISDestroy(&tisB_N);CHKERRQ(ierr);
1447     ierr = ISGlobalToLocalMappingApplyIS(R_to_N,IS_GTOLM_DROP,tisR,&tisR_R);CHKERRQ(ierr);
1448     ierr = ISDestroy(&tisR);CHKERRQ(ierr);
1449     ierr = VecScatterCreate(pcbddc->vec1_R,tisR_R,pcbddc->vec1_R,NULL,&reuse_mumps->correction_scatter_R);CHKERRQ(ierr);
1450     ierr = ISDestroy(&tisR_R);CHKERRQ(ierr);
1451     ierr = ISLocalToGlobalMappingDestroy(&R_to_N);CHKERRQ(ierr);
1452   }
1453   PetscFunctionReturn(0);
1454 }
1455 
1456 
1457 #undef __FUNCT__
1458 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1459 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1460 {
1461   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1462   PC_IS          *pcis = (PC_IS*)pc->data;
1463   PC             pc_temp;
1464   Mat            A_RR;
1465   MatReuse       reuse;
1466   PetscScalar    m_one = -1.0;
1467   PetscReal      value;
1468   PetscInt       n_D,n_R,ibs,mbs;
1469   PetscBool      use_exact,use_exact_reduced,issbaij;
1470   PetscErrorCode ierr;
1471   /* prefixes stuff */
1472   char           dir_prefix[256],neu_prefix[256],str_level[16];
1473   size_t         len;
1474 
1475   PetscFunctionBegin;
1476 
1477   /* compute prefixes */
1478   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1479   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1480   if (!pcbddc->current_level) {
1481     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1482     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1483     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1484     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1485   } else {
1486     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1487     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1488     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1489     len -= 15; /* remove "pc_bddc_coarse_" */
1490     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1491     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1492     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1493     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1494     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1495     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1496     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1497     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1498   }
1499 
1500   /* DIRICHLET PROBLEM */
1501   if (dirichlet) {
1502     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1503     if (pcbddc->issym) {
1504       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1505     }
1506     /* Matrix for Dirichlet problem is pcis->A_II */
1507     n_D = pcis->n - pcis->n_B;
1508     if (!pcbddc->ksp_D) { /* create object if not yet build */
1509       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1510       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1511       /* default */
1512       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1513       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1514       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1515       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1516       if (issbaij) {
1517         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1518       } else {
1519         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1520       }
1521       /* Allow user's customization */
1522       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1523       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1524     }
1525     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1526     if (sub_schurs->reuse_mumps) {
1527       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1528 
1529       ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr);
1530     }
1531     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1532     if (!n_D) {
1533       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1534       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1535     }
1536     /* Set Up KSP for Dirichlet problem of BDDC */
1537     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1538     /* set ksp_D into pcis data */
1539     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1540     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1541     pcis->ksp_D = pcbddc->ksp_D;
1542   }
1543 
1544   /* NEUMANN PROBLEM */
1545   A_RR = 0;
1546   if (neumann) {
1547     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1548     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1549     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1550     if (pcbddc->ksp_R) { /* already created ksp */
1551       PetscInt nn_R;
1552       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1553       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1554       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1555       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1556         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1557         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1558         reuse = MAT_INITIAL_MATRIX;
1559       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1560         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1561           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1562           reuse = MAT_INITIAL_MATRIX;
1563         } else { /* safe to reuse the matrix */
1564           reuse = MAT_REUSE_MATRIX;
1565         }
1566       }
1567       /* last check */
1568       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1569         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1570         reuse = MAT_INITIAL_MATRIX;
1571       }
1572     } else { /* first time, so we need to create the matrix */
1573       reuse = MAT_INITIAL_MATRIX;
1574     }
1575     if (!pcbddc->ksp_R) { /* create object if not present */
1576       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1577       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1578       /* default */
1579       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1580       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1581       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1582       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1583       if (issbaij) {
1584         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1585       } else {
1586         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1587       }
1588       /* Allow user's customization */
1589       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1590       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1591     }
1592     /* extract A_RR */
1593     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1594     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1595     if (ibs != mbs) {
1596       Mat newmat;
1597       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1598       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1599       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1600     } else {
1601       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1602     }
1603     if (pcbddc->issym) {
1604       ierr = MatSetOption(A_RR,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1605     }
1606     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1607     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1608     if (!n_R) {
1609       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1610       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1611     }
1612     /* Reuse MUMPS solver if it is present */
1613     if (sub_schurs->reuse_mumps) {
1614       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1615 
1616       ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr);
1617     }
1618     /* Set Up KSP for Neumann problem of BDDC */
1619     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1620   }
1621 
1622   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1623   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1624     if (pcbddc->dbg_flag) {
1625       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1626       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1627       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1628     }
1629     if (dirichlet) { /* Dirichlet */
1630       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1631       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1632       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1633       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1634       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1635       /* need to be adapted? */
1636       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1637       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1638       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1639       /* print info */
1640       if (pcbddc->dbg_flag) {
1641         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1642         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1643       }
1644       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1645         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1646       }
1647     }
1648     if (neumann) { /* Neumann */
1649       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1650       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1651       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1652       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1653       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1654       /* need to be adapted? */
1655       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1656       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1657       /* print info */
1658       if (pcbddc->dbg_flag) {
1659         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1660         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1661       }
1662       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1663         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1664       }
1665     }
1666   }
1667   /* free Neumann problem's matrix */
1668   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1669   PetscFunctionReturn(0);
1670 }
1671 
1672 #undef __FUNCT__
1673 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1674 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
1675 {
1676   PetscErrorCode ierr;
1677   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1678 
1679   PetscFunctionBegin;
1680   ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr);
1681   if (!pcbddc->switch_static) {
1682     if (applytranspose && pcbddc->local_auxmat1) {
1683       ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1684       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1685     }
1686     ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1687     ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1688   } else {
1689     ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1690     ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1691     ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1692     ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1693     if (applytranspose && pcbddc->local_auxmat1) {
1694       ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr);
1695       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1696       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1697       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1698     }
1699   }
1700   if (applytranspose) {
1701     ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1702   } else {
1703     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1704   }
1705   ierr = VecSet(inout_B,0.);CHKERRQ(ierr);
1706   if (!pcbddc->switch_static) {
1707     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1708     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1709     if (!applytranspose && pcbddc->local_auxmat1) {
1710       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1711       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1712     }
1713   } else {
1714     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1715     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1716     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1717     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1718     if (!applytranspose && pcbddc->local_auxmat1) {
1719       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1720       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1721     }
1722     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1723     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1724     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1725     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1726   }
1727   PetscFunctionReturn(0);
1728 }
1729 
1730 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1731 #undef __FUNCT__
1732 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1733 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1734 {
1735   PetscErrorCode ierr;
1736   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1737   PC_IS*            pcis = (PC_IS*)  (pc->data);
1738   const PetscScalar zero = 0.0;
1739 
1740   PetscFunctionBegin;
1741   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1742   if (applytranspose) {
1743     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1744     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1745   } else {
1746     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1747     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1748   }
1749   /* start communications from local primal nodes to rhs of coarse solver */
1750   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1751   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1752   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1753 
1754   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1755   /* TODO remove null space when doing multilevel */
1756   if (pcbddc->coarse_ksp) {
1757     Vec rhs,sol;
1758 
1759     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
1760     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
1761     if (applytranspose) {
1762       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1763     } else {
1764       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1765     }
1766   }
1767 
1768   /* Local solution on R nodes */
1769   if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */
1770     ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr);
1771   }
1772 
1773   /* communications from coarse sol to local primal nodes */
1774   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1775   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1776 
1777   /* Sum contributions from two levels */
1778   if (applytranspose) {
1779     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1780     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1781   } else {
1782     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1783     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1784   }
1785   PetscFunctionReturn(0);
1786 }
1787 
1788 #undef __FUNCT__
1789 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1790 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1791 {
1792   PetscErrorCode ierr;
1793   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1794   PetscScalar    *array;
1795   Vec            from,to;
1796 
1797   PetscFunctionBegin;
1798   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1799     from = pcbddc->coarse_vec;
1800     to = pcbddc->vec1_P;
1801     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1802       Vec tvec;
1803 
1804       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1805       ierr = VecResetArray(tvec);CHKERRQ(ierr);
1806       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1807       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
1808       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
1809       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
1810     }
1811   } else { /* from local to global -> put data in coarse right hand side */
1812     from = pcbddc->vec1_P;
1813     to = pcbddc->coarse_vec;
1814   }
1815   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1816   PetscFunctionReturn(0);
1817 }
1818 
1819 #undef __FUNCT__
1820 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1821 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1822 {
1823   PetscErrorCode ierr;
1824   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1825   PetscScalar    *array;
1826   Vec            from,to;
1827 
1828   PetscFunctionBegin;
1829   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1830     from = pcbddc->coarse_vec;
1831     to = pcbddc->vec1_P;
1832   } else { /* from local to global -> put data in coarse right hand side */
1833     from = pcbddc->vec1_P;
1834     to = pcbddc->coarse_vec;
1835   }
1836   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1837   if (smode == SCATTER_FORWARD) {
1838     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1839       Vec tvec;
1840 
1841       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1842       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
1843       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
1844       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
1845     }
1846   } else {
1847     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
1848      ierr = VecResetArray(from);CHKERRQ(ierr);
1849     }
1850   }
1851   PetscFunctionReturn(0);
1852 }
1853 
1854 /* uncomment for testing purposes */
1855 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1856 #undef __FUNCT__
1857 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1858 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1859 {
1860   PetscErrorCode    ierr;
1861   PC_IS*            pcis = (PC_IS*)(pc->data);
1862   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1863   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1864   /* one and zero */
1865   PetscScalar       one=1.0,zero=0.0;
1866   /* space to store constraints and their local indices */
1867   PetscScalar       *constraints_data;
1868   PetscInt          *constraints_idxs,*constraints_idxs_B;
1869   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
1870   PetscInt          *constraints_n;
1871   /* iterators */
1872   PetscInt          i,j,k,total_counts,total_counts_cc,cum;
1873   /* BLAS integers */
1874   PetscBLASInt      lwork,lierr;
1875   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1876   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1877   /* reuse */
1878   PetscInt          olocal_primal_size,olocal_primal_size_cc;
1879   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
1880   /* change of basis */
1881   PetscBool         qr_needed;
1882   PetscBT           change_basis,qr_needed_idx;
1883   /* auxiliary stuff */
1884   PetscInt          *nnz,*is_indices;
1885   PetscInt          ncc;
1886   /* some quantities */
1887   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1888   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1889 
1890   PetscFunctionBegin;
1891   /* Destroy Mat objects computed previously */
1892   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1893   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1894   /* save info on constraints from previous setup (if any) */
1895   olocal_primal_size = pcbddc->local_primal_size;
1896   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
1897   ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr);
1898   ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
1899   ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
1900   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
1901   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
1902 
1903   /* print some info */
1904   if (pcbddc->dbg_flag) {
1905     IS       vertices;
1906     PetscInt nv,nedges,nfaces;
1907     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
1908     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
1909     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
1910     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1911     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1912     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
1913     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
1914     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
1915     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1916   }
1917 
1918   if (!pcbddc->adaptive_selection) {
1919     IS           ISForVertices,*ISForFaces,*ISForEdges;
1920     MatNullSpace nearnullsp;
1921     const Vec    *nearnullvecs;
1922     Vec          *localnearnullsp;
1923     PetscScalar  *array;
1924     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
1925     PetscBool    nnsp_has_cnst;
1926     /* LAPACK working arrays for SVD or POD */
1927     PetscBool    skip_lapack,boolforchange;
1928     PetscScalar  *work;
1929     PetscReal    *singular_vals;
1930 #if defined(PETSC_USE_COMPLEX)
1931     PetscReal    *rwork;
1932 #endif
1933 #if defined(PETSC_MISSING_LAPACK_GESVD)
1934     PetscScalar  *temp_basis,*correlation_mat;
1935 #else
1936     PetscBLASInt dummy_int=1;
1937     PetscScalar  dummy_scalar=1.;
1938 #endif
1939 
1940     /* Get index sets for faces, edges and vertices from graph */
1941     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1942     /* free unneeded index sets */
1943     if (!pcbddc->use_vertices) {
1944       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1945     }
1946     if (!pcbddc->use_edges) {
1947       for (i=0;i<n_ISForEdges;i++) {
1948         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1949       }
1950       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1951       n_ISForEdges = 0;
1952     }
1953     if (!pcbddc->use_faces) {
1954       for (i=0;i<n_ISForFaces;i++) {
1955         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1956       }
1957       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1958       n_ISForFaces = 0;
1959     }
1960 
1961 #if defined(PETSC_USE_DEBUG)
1962     /* HACK: when solving singular problems not using vertices, a change of basis is mandatory.
1963        Also use_change_of_basis should be consistent among processors */
1964     if (pcbddc->NullSpace) {
1965       PetscBool tbool[2],gbool[2];
1966 
1967       if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) {
1968         pcbddc->use_change_of_basis = PETSC_TRUE;
1969         if (!ISForEdges) {
1970           pcbddc->use_change_on_faces = PETSC_TRUE;
1971         }
1972       }
1973       tbool[0] = pcbddc->use_change_of_basis;
1974       tbool[1] = pcbddc->use_change_on_faces;
1975       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1976       pcbddc->use_change_of_basis = gbool[0];
1977       pcbddc->use_change_on_faces = gbool[1];
1978     }
1979 #endif
1980 
1981     /* check if near null space is attached to global mat */
1982     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1983     if (nearnullsp) {
1984       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1985       /* remove any stored info */
1986       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1987       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1988       /* store information for BDDC solver reuse */
1989       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1990       pcbddc->onearnullspace = nearnullsp;
1991       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1992       for (i=0;i<nnsp_size;i++) {
1993         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1994       }
1995     } else { /* if near null space is not provided BDDC uses constants by default */
1996       nnsp_size = 0;
1997       nnsp_has_cnst = PETSC_TRUE;
1998     }
1999     /* get max number of constraints on a single cc */
2000     max_constraints = nnsp_size;
2001     if (nnsp_has_cnst) max_constraints++;
2002 
2003     /*
2004          Evaluate maximum storage size needed by the procedure
2005          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
2006          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
2007          There can be multiple constraints per connected component
2008                                                                                                                                                            */
2009     n_vertices = 0;
2010     if (ISForVertices) {
2011       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
2012     }
2013     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
2014     ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr);
2015 
2016     total_counts = n_ISForFaces+n_ISForEdges;
2017     total_counts *= max_constraints;
2018     total_counts += n_vertices;
2019     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2020 
2021     total_counts = 0;
2022     max_size_of_constraint = 0;
2023     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
2024       IS used_is;
2025       if (i<n_ISForEdges) {
2026         used_is = ISForEdges[i];
2027       } else {
2028         used_is = ISForFaces[i-n_ISForEdges];
2029       }
2030       ierr = ISGetSize(used_is,&j);CHKERRQ(ierr);
2031       total_counts += j;
2032       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
2033     }
2034     ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr);
2035 
2036     /* get local part of global near null space vectors */
2037     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
2038     for (k=0;k<nnsp_size;k++) {
2039       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
2040       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2041       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2042     }
2043 
2044     /* whether or not to skip lapack calls */
2045     skip_lapack = PETSC_TRUE;
2046     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
2047 
2048     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
2049     if (!skip_lapack) {
2050       PetscScalar temp_work;
2051 
2052 #if defined(PETSC_MISSING_LAPACK_GESVD)
2053       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
2054       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
2055       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
2056       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
2057 #if defined(PETSC_USE_COMPLEX)
2058       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
2059 #endif
2060       /* now we evaluate the optimal workspace using query with lwork=-1 */
2061       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2062       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
2063       lwork = -1;
2064       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2065 #if !defined(PETSC_USE_COMPLEX)
2066       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
2067 #else
2068       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
2069 #endif
2070       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2071       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
2072 #else /* on missing GESVD */
2073       /* SVD */
2074       PetscInt max_n,min_n;
2075       max_n = max_size_of_constraint;
2076       min_n = max_constraints;
2077       if (max_size_of_constraint < max_constraints) {
2078         min_n = max_size_of_constraint;
2079         max_n = max_constraints;
2080       }
2081       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
2082 #if defined(PETSC_USE_COMPLEX)
2083       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
2084 #endif
2085       /* now we evaluate the optimal workspace using query with lwork=-1 */
2086       lwork = -1;
2087       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
2088       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
2089       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
2090       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2091 #if !defined(PETSC_USE_COMPLEX)
2092       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
2093 #else
2094       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
2095 #endif
2096       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2097       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
2098 #endif /* on missing GESVD */
2099       /* Allocate optimal workspace */
2100       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
2101       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
2102     }
2103     /* Now we can loop on constraining sets */
2104     total_counts = 0;
2105     constraints_idxs_ptr[0] = 0;
2106     constraints_data_ptr[0] = 0;
2107     /* vertices */
2108     if (n_vertices) {
2109       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2110       if (nnsp_has_cnst) { /* it considers all possible vertices */
2111         ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2112         for (i=0;i<n_vertices;i++) {
2113           constraints_n[total_counts] = 1;
2114           constraints_data[total_counts] = 1.0;
2115           constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2116           constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2117           total_counts++;
2118         }
2119       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
2120         PetscBool used_vertex;
2121         for (i=0;i<n_vertices;i++) {
2122           used_vertex = PETSC_FALSE;
2123           k = 0;
2124           while (!used_vertex && k<nnsp_size) {
2125             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2126             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
2127               constraints_n[total_counts] = 1;
2128               constraints_idxs[total_counts] = is_indices[i];
2129               constraints_data[total_counts] = 1.0;
2130               constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2131               constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2132               total_counts++;
2133               used_vertex = PETSC_TRUE;
2134             }
2135             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2136             k++;
2137           }
2138         }
2139       }
2140       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2141       n_vertices = total_counts;
2142     }
2143 
2144     /* edges and faces */
2145     total_counts_cc = total_counts;
2146     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2147       IS        used_is;
2148       PetscBool idxs_copied = PETSC_FALSE;
2149 
2150       if (ncc<n_ISForEdges) {
2151         used_is = ISForEdges[ncc];
2152         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2153       } else {
2154         used_is = ISForFaces[ncc-n_ISForEdges];
2155         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2156       }
2157       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2158 
2159       ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr);
2160       ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2161       /* change of basis should not be performed on local periodic nodes */
2162       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2163       if (nnsp_has_cnst) {
2164         PetscScalar quad_value;
2165 
2166         ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2167         idxs_copied = PETSC_TRUE;
2168 
2169         if (!pcbddc->use_nnsp_true) {
2170           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2171         } else {
2172           quad_value = 1.0;
2173         }
2174         for (j=0;j<size_of_constraint;j++) {
2175           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
2176         }
2177         temp_constraints++;
2178         total_counts++;
2179       }
2180       for (k=0;k<nnsp_size;k++) {
2181         PetscReal real_value;
2182         PetscScalar *ptr_to_data;
2183 
2184         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2185         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
2186         for (j=0;j<size_of_constraint;j++) {
2187           ptr_to_data[j] = array[is_indices[j]];
2188         }
2189         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2190         /* check if array is null on the connected component */
2191         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2192         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
2193         if (real_value > 0.0) { /* keep indices and values */
2194           temp_constraints++;
2195           total_counts++;
2196           if (!idxs_copied) {
2197             ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2198             idxs_copied = PETSC_TRUE;
2199           }
2200         }
2201       }
2202       ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2203       valid_constraints = temp_constraints;
2204       if (!pcbddc->use_nnsp_true && temp_constraints) {
2205         if (temp_constraints == 1) { /* just normalize the constraint */
2206           PetscScalar norm,*ptr_to_data;
2207 
2208           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2209           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2210           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
2211           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2212           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
2213         } else { /* perform SVD */
2214           PetscReal   tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2215           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2216 
2217 #if defined(PETSC_MISSING_LAPACK_GESVD)
2218           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2219              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2220              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2221                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2222                 from that computed using LAPACKgesvd
2223              -> This is due to a different computation of eigenvectors in LAPACKheev
2224              -> The quality of the POD-computed basis will be the same */
2225           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2226           /* Store upper triangular part of correlation matrix */
2227           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2228           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2229           for (j=0;j<temp_constraints;j++) {
2230             for (k=0;k<j+1;k++) {
2231               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
2232             }
2233           }
2234           /* compute eigenvalues and eigenvectors of correlation matrix */
2235           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2236           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2237 #if !defined(PETSC_USE_COMPLEX)
2238           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2239 #else
2240           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2241 #endif
2242           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2243           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2244           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2245           j = 0;
2246           while (j < temp_constraints && singular_vals[j] < tol) j++;
2247           total_counts = total_counts-j;
2248           valid_constraints = temp_constraints-j;
2249           /* scale and copy POD basis into used quadrature memory */
2250           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2251           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2252           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2253           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2254           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2255           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2256           if (j<temp_constraints) {
2257             PetscInt ii;
2258             for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
2259             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2260             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2261             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2262             for (k=0;k<temp_constraints-j;k++) {
2263               for (ii=0;ii<size_of_constraint;ii++) {
2264                 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2265               }
2266             }
2267           }
2268 #else  /* on missing GESVD */
2269           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2270           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2271           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2272           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2273 #if !defined(PETSC_USE_COMPLEX)
2274           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2275 #else
2276           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2277 #endif
2278           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2279           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2280           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2281           k = temp_constraints;
2282           if (k > size_of_constraint) k = size_of_constraint;
2283           j = 0;
2284           while (j < k && singular_vals[k-j-1] < tol) j++;
2285           valid_constraints = k-j;
2286           total_counts = total_counts-temp_constraints+valid_constraints;
2287 #endif /* on missing GESVD */
2288         }
2289       }
2290       /* update pointers information */
2291       if (valid_constraints) {
2292         constraints_n[total_counts_cc] = valid_constraints;
2293         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
2294         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
2295         /* set change_of_basis flag */
2296         if (boolforchange) {
2297           PetscBTSet(change_basis,total_counts_cc);
2298         }
2299         total_counts_cc++;
2300       }
2301     }
2302     /* free workspace */
2303     if (!skip_lapack) {
2304       ierr = PetscFree(work);CHKERRQ(ierr);
2305 #if defined(PETSC_USE_COMPLEX)
2306       ierr = PetscFree(rwork);CHKERRQ(ierr);
2307 #endif
2308       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2309 #if defined(PETSC_MISSING_LAPACK_GESVD)
2310       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2311       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2312 #endif
2313     }
2314     for (k=0;k<nnsp_size;k++) {
2315       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2316     }
2317     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2318     /* free index sets of faces, edges and vertices */
2319     for (i=0;i<n_ISForFaces;i++) {
2320       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2321     }
2322     if (n_ISForFaces) {
2323       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2324     }
2325     for (i=0;i<n_ISForEdges;i++) {
2326       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2327     }
2328     if (n_ISForEdges) {
2329       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2330     }
2331     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2332   } else {
2333     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2334 
2335     total_counts = 0;
2336     n_vertices = 0;
2337     if (sub_schurs->is_vertices && pcbddc->use_vertices) {
2338       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
2339     }
2340     max_constraints = 0;
2341     total_counts_cc = 0;
2342     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2343       total_counts += pcbddc->adaptive_constraints_n[i];
2344       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
2345       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2346     }
2347     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
2348     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
2349     constraints_idxs = pcbddc->adaptive_constraints_idxs;
2350     constraints_data = pcbddc->adaptive_constraints_data;
2351     /* constraints_n differs from pcbddc->adaptive_constraints_n */
2352     ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr);
2353     total_counts_cc = 0;
2354     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2355       if (pcbddc->adaptive_constraints_n[i]) {
2356         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
2357       }
2358     }
2359 #if 0
2360     printf("Found %d totals (%d)\n",total_counts_cc,total_counts);
2361     for (i=0;i<total_counts_cc;i++) {
2362       printf("const %d, start %d",i,constraints_idxs_ptr[i]);
2363       printf(" end %d:\n",constraints_idxs_ptr[i+1]);
2364       for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) {
2365         printf(" %d",constraints_idxs[j]);
2366       }
2367       printf("\n");
2368       printf("number of cc: %d\n",constraints_n[i]);
2369     }
2370     for (i=0;i<n_vertices;i++) {
2371       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
2372     }
2373     for (i=0;i<sub_schurs->n_subs;i++) {
2374       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2375     }
2376 #endif
2377 
2378     max_size_of_constraint = 0;
2379     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
2380     ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr);
2381     /* Change of basis */
2382     ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr);
2383     if (pcbddc->use_change_of_basis) {
2384       for (i=0;i<sub_schurs->n_subs;i++) {
2385         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2386           ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr);
2387         }
2388       }
2389     }
2390   }
2391   pcbddc->local_primal_size = total_counts;
2392   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2393 
2394   /* map constraints_idxs in boundary numbering */
2395   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr);
2396   if (i != constraints_idxs_ptr[total_counts_cc]) {
2397     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i);
2398   }
2399 
2400   /* Create constraint matrix */
2401   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2402   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2403   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2404 
2405   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2406   /* determine if a QR strategy is needed for change of basis */
2407   qr_needed = PETSC_FALSE;
2408   ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr);
2409   total_primal_vertices=0;
2410   pcbddc->local_primal_size_cc = 0;
2411   for (i=0;i<total_counts_cc;i++) {
2412     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2413     if (size_of_constraint == 1) {
2414       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
2415       pcbddc->local_primal_size_cc += 1;
2416     } else if (PetscBTLookup(change_basis,i)) {
2417       for (k=0;k<constraints_n[i];k++) {
2418         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2419       }
2420       pcbddc->local_primal_size_cc += constraints_n[i];
2421       if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) {
2422         PetscBTSet(qr_needed_idx,i);
2423         qr_needed = PETSC_TRUE;
2424       }
2425     } else {
2426       pcbddc->local_primal_size_cc += 1;
2427     }
2428   }
2429   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
2430   pcbddc->n_vertices = total_primal_vertices;
2431   /* permute indices in order to have a sorted set of vertices */
2432   ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2433 
2434   ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
2435   ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2436   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
2437 
2438   /* nonzero structure of constraint matrix */
2439   /* and get reference dof for local constraints */
2440   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2441   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
2442 
2443   j = total_primal_vertices;
2444   total_counts = total_primal_vertices;
2445   cum = total_primal_vertices;
2446   for (i=n_vertices;i<total_counts_cc;i++) {
2447     if (!PetscBTLookup(change_basis,i)) {
2448       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
2449       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
2450       cum++;
2451       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2452       for (k=0;k<constraints_n[i];k++) {
2453         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2454         nnz[j+k] = size_of_constraint;
2455       }
2456       j += constraints_n[i];
2457     }
2458   }
2459   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2460   ierr = PetscFree(nnz);CHKERRQ(ierr);
2461 
2462   /* set values in constraint matrix */
2463   for (i=0;i<total_primal_vertices;i++) {
2464     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2465   }
2466   total_counts = total_primal_vertices;
2467   for (i=n_vertices;i<total_counts_cc;i++) {
2468     if (!PetscBTLookup(change_basis,i)) {
2469       PetscInt *cols;
2470 
2471       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2472       cols = constraints_idxs+constraints_idxs_ptr[i];
2473       for (k=0;k<constraints_n[i];k++) {
2474         PetscInt    row = total_counts+k;
2475         PetscScalar *vals;
2476 
2477         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
2478         ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2479       }
2480       total_counts += constraints_n[i];
2481     }
2482   }
2483   /* assembling */
2484   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2485   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2486 
2487   /*
2488   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2489   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2490   */
2491   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2492   if (pcbddc->use_change_of_basis) {
2493     /* dual and primal dofs on a single cc */
2494     PetscInt     dual_dofs,primal_dofs;
2495     /* working stuff for GEQRF */
2496     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2497     PetscBLASInt lqr_work;
2498     /* working stuff for UNGQR */
2499     PetscScalar  *gqr_work,lgqr_work_t;
2500     PetscBLASInt lgqr_work;
2501     /* working stuff for TRTRS */
2502     PetscScalar  *trs_rhs;
2503     PetscBLASInt Blas_NRHS;
2504     /* pointers for values insertion into change of basis matrix */
2505     PetscInt     *start_rows,*start_cols;
2506     PetscScalar  *start_vals;
2507     /* working stuff for values insertion */
2508     PetscBT      is_primal;
2509     PetscInt     *aux_primal_numbering_B;
2510     /* matrix sizes */
2511     PetscInt     global_size,local_size;
2512     /* temporary change of basis */
2513     Mat          localChangeOfBasisMatrix;
2514     /* extra space for debugging */
2515     PetscScalar  *dbg_work;
2516 
2517     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2518     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2519     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2520     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2521     /* nonzeros for local mat */
2522     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2523     for (i=0;i<pcis->n;i++) nnz[i]=1;
2524     for (i=n_vertices;i<total_counts_cc;i++) {
2525       if (PetscBTLookup(change_basis,i)) {
2526         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2527         if (PetscBTLookup(qr_needed_idx,i)) {
2528           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
2529         } else {
2530           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
2531           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
2532         }
2533       }
2534     }
2535     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2536     ierr = PetscFree(nnz);CHKERRQ(ierr);
2537     /* Set initial identity in the matrix */
2538     for (i=0;i<pcis->n;i++) {
2539       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2540     }
2541 
2542     if (pcbddc->dbg_flag) {
2543       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2544       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2545     }
2546 
2547 
2548     /* Now we loop on the constraints which need a change of basis */
2549     /*
2550        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2551        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2552 
2553        Basic blocks of change of basis matrix T computed by
2554 
2555           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2556 
2557             | 1        0   ...        0         s_1/S |
2558             | 0        1   ...        0         s_2/S |
2559             |              ...                        |
2560             | 0        ...            1     s_{n-1}/S |
2561             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2562 
2563             with S = \sum_{i=1}^n s_i^2
2564             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2565                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2566 
2567           - QR decomposition of constraints otherwise
2568     */
2569     if (qr_needed) {
2570       /* space to store Q */
2571       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2572       /* first we issue queries for optimal work */
2573       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2574       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2575       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2576       lqr_work = -1;
2577       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2578       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2579       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2580       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2581       lgqr_work = -1;
2582       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2583       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2584       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2585       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2586       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2587       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2588       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2589       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2590       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2591       /* array to store scaling factors for reflectors */
2592       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2593       /* array to store rhs and solution of triangular solver */
2594       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2595       /* allocating workspace for check */
2596       if (pcbddc->dbg_flag) {
2597         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
2598       }
2599     }
2600     /* array to store whether a node is primal or not */
2601     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2602     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2603     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2604     if (i != total_primal_vertices) {
2605       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2606     }
2607     for (i=0;i<total_primal_vertices;i++) {
2608       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2609     }
2610     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2611 
2612     /* loop on constraints and see whether or not they need a change of basis and compute it */
2613     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
2614       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
2615       if (PetscBTLookup(change_basis,total_counts)) {
2616         /* get constraint info */
2617         primal_dofs = constraints_n[total_counts];
2618         dual_dofs = size_of_constraint-primal_dofs;
2619 
2620         if (pcbddc->dbg_flag) {
2621           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr);
2622         }
2623 
2624         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2625 
2626           /* copy quadrature constraints for change of basis check */
2627           if (pcbddc->dbg_flag) {
2628             ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2629           }
2630           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2631           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2632 
2633           /* compute QR decomposition of constraints */
2634           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2635           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2636           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2637           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2638           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2639           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2640           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2641 
2642           /* explictly compute R^-T */
2643           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2644           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2645           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2646           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2647           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2648           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2649           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2650           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2651           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2652           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2653 
2654           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2655           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2656           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2657           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2658           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2659           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2660           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2661           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2662           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2663 
2664           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2665              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2666              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2667           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2668           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2669           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2670           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2671           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2672           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2673           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2674           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
2675           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2676           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2677 
2678           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2679           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
2680           /* insert cols for primal dofs */
2681           for (j=0;j<primal_dofs;j++) {
2682             start_vals = &qr_basis[j*size_of_constraint];
2683             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2684             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2685           }
2686           /* insert cols for dual dofs */
2687           for (j=0,k=0;j<dual_dofs;k++) {
2688             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
2689               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2690               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2691               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2692               j++;
2693             }
2694           }
2695 
2696           /* check change of basis */
2697           if (pcbddc->dbg_flag) {
2698             PetscInt   ii,jj;
2699             PetscBool valid_qr=PETSC_TRUE;
2700             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2701             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2702             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2703             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2704             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2705             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2706             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2707             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
2708             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2709             for (jj=0;jj<size_of_constraint;jj++) {
2710               for (ii=0;ii<primal_dofs;ii++) {
2711                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2712                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2713               }
2714             }
2715             if (!valid_qr) {
2716               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2717               for (jj=0;jj<size_of_constraint;jj++) {
2718                 for (ii=0;ii<primal_dofs;ii++) {
2719                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2720                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2721                   }
2722                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2723                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2724                   }
2725                 }
2726               }
2727             } else {
2728               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2729             }
2730           }
2731         } else { /* simple transformation block */
2732           PetscInt    row,col;
2733           PetscScalar val,norm;
2734 
2735           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2736           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
2737           for (j=0;j<size_of_constraint;j++) {
2738             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
2739             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2740             if (!PetscBTLookup(is_primal,row_B)) {
2741               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
2742               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2743               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2744             } else {
2745               for (k=0;k<size_of_constraint;k++) {
2746                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2747                 if (row != col) {
2748                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
2749                 } else {
2750                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
2751                 }
2752                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2753               }
2754             }
2755           }
2756           if (pcbddc->dbg_flag) {
2757             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2758           }
2759         }
2760       } else {
2761         if (pcbddc->dbg_flag) {
2762           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr);
2763         }
2764       }
2765     }
2766 
2767     /* free workspace */
2768     if (qr_needed) {
2769       if (pcbddc->dbg_flag) {
2770         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
2771       }
2772       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2773       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2774       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2775       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2776       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2777     }
2778     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2779     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2780     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2781 
2782     /* assembling of global change of variable */
2783     {
2784       Mat      tmat;
2785       PetscInt bs;
2786 
2787       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2788       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2789       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2790       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2791       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2792       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2793       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
2794       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
2795       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2796       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2797       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2798       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2799       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2800       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2801       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2802       ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2803       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2804       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2805     }
2806     /* check */
2807     if (pcbddc->dbg_flag) {
2808       PetscReal error;
2809       Vec       x,x_change;
2810 
2811       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2812       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2813       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2814       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2815       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2816       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2817       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2818       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2819       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2820       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2821       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2822       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2823       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2824       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2825       ierr = VecDestroy(&x);CHKERRQ(ierr);
2826       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2827     }
2828 
2829     /* adapt sub_schurs computed (if any) */
2830     if (pcbddc->use_deluxe_scaling) {
2831       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2832       if (sub_schurs->S_Ej_all) {
2833         Mat S_new,tmat;
2834         IS is_all_N;
2835 
2836         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2837         ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,&tmat);CHKERRQ(ierr);
2838         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2839         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2840         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2841         ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2842         sub_schurs->S_Ej_all = S_new;
2843         ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2844         if (sub_schurs->sum_S_Ej_all) {
2845           ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2846           ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2847           ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2848           sub_schurs->sum_S_Ej_all = S_new;
2849           ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2850         }
2851         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2852       }
2853     }
2854     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2855   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2856     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2857     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2858   }
2859 
2860   /* set up change of basis context */
2861   if (pcbddc->ChangeOfBasisMatrix) {
2862     PCBDDCChange_ctx change_ctx;
2863 
2864     if (!pcbddc->new_global_mat) {
2865       PetscInt global_size,local_size;
2866 
2867       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2868       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2869       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2870       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2871       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2872       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2873       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2874       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2875       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2876     } else {
2877       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2878       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2879       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2880     }
2881     if (!pcbddc->user_ChangeOfBasisMatrix) {
2882       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2883       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2884     } else {
2885       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2886       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2887     }
2888     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2889     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2890     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2891     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2892   }
2893 
2894   /* check if a new primal space has been introduced */
2895   pcbddc->new_primal_space_local = PETSC_TRUE;
2896   if (olocal_primal_size == pcbddc->local_primal_size) {
2897     ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2898     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2899     if (!pcbddc->new_primal_space_local) {
2900       ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2901       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2902     }
2903   }
2904   ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr);
2905   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2906   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2907 
2908   /* flush dbg viewer */
2909   if (pcbddc->dbg_flag) {
2910     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2911   }
2912 
2913   /* free workspace */
2914   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2915   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2916   if (!pcbddc->adaptive_selection) {
2917     ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr);
2918     ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr);
2919   } else {
2920     ierr = PetscFree5(pcbddc->adaptive_constraints_n,
2921                       pcbddc->adaptive_constraints_idxs_ptr,
2922                       pcbddc->adaptive_constraints_data_ptr,
2923                       pcbddc->adaptive_constraints_idxs,
2924                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2925     ierr = PetscFree(constraints_n);CHKERRQ(ierr);
2926     ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr);
2927   }
2928   PetscFunctionReturn(0);
2929 }
2930 
2931 #undef __FUNCT__
2932 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2933 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2934 {
2935   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2936   PC_IS       *pcis = (PC_IS*)pc->data;
2937   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2938   PetscInt    ierr,i,vertex_size,N;
2939   PetscViewer viewer=pcbddc->dbg_viewer;
2940 
2941   PetscFunctionBegin;
2942   /* Reset previously computed graph */
2943   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2944   /* Init local Graph struct */
2945   ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr);
2946   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);CHKERRQ(ierr);
2947 
2948   /* Check validity of the csr graph passed in by the user */
2949   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2950     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2951   }
2952 
2953   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2954   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2955     PetscInt  *xadj,*adjncy;
2956     PetscInt  nvtxs;
2957     PetscBool flg_row=PETSC_FALSE;
2958 
2959     if (pcbddc->use_local_adj) {
2960 
2961       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2962       if (flg_row) {
2963         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2964         pcbddc->computed_rowadj = PETSC_TRUE;
2965       }
2966       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2967     } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */
2968       IS                     is_dummy;
2969       ISLocalToGlobalMapping l2gmap_dummy;
2970       PetscInt               j,sum;
2971       PetscInt               *cxadj,*cadjncy;
2972       const PetscInt         *idxs;
2973       PCBDDCGraph            graph;
2974       PetscBT                is_on_boundary;
2975 
2976       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
2977       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
2978       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
2979       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
2980       ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr);
2981       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
2982       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2983       if (flg_row) {
2984         graph->xadj = xadj;
2985         graph->adjncy = adjncy;
2986       }
2987       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
2988       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
2989       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2990 
2991       if (pcbddc->dbg_flag) {
2992         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr);
2993         for (i=0;i<graph->ncc;i++) {
2994           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
2995         }
2996       }
2997 
2998       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
2999       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3000       for (i=0;i<pcis->n_B;i++) {
3001         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
3002       }
3003       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3004 
3005       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
3006       sum = 0;
3007       for (i=0;i<graph->ncc;i++) {
3008         PetscInt sizecc = 0;
3009         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3010           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3011             sizecc++;
3012           }
3013         }
3014         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3015           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3016             cxadj[graph->queue[j]] = sizecc;
3017           }
3018         }
3019         sum += sizecc*sizecc;
3020       }
3021       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
3022       sum = 0;
3023       for (i=0;i<pcis->n;i++) {
3024         PetscInt temp = cxadj[i];
3025         cxadj[i] = sum;
3026         sum += temp;
3027       }
3028       cxadj[pcis->n] = sum;
3029       for (i=0;i<graph->ncc;i++) {
3030         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3031           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3032             PetscInt k,sizecc = 0;
3033             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
3034               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
3035                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
3036                 sizecc++;
3037               }
3038             }
3039           }
3040         }
3041       }
3042       if (sum) {
3043         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
3044       } else {
3045         ierr = PetscFree(cxadj);CHKERRQ(ierr);
3046         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
3047       }
3048       graph->xadj = 0;
3049       graph->adjncy = 0;
3050       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
3051       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
3052     }
3053   }
3054   if (pcbddc->dbg_flag) {
3055     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3056   }
3057 
3058   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
3059   vertex_size = 1;
3060   if (pcbddc->user_provided_isfordofs) {
3061     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
3062       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3063       for (i=0;i<pcbddc->n_ISForDofs;i++) {
3064         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3065         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
3066       }
3067       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
3068       pcbddc->n_ISForDofs = 0;
3069       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
3070     }
3071     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
3072     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
3073   } else {
3074     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
3075       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
3076       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3077       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3078         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3079       }
3080     }
3081   }
3082 
3083   /* Setup of Graph */
3084   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3085     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
3086   }
3087   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3088     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
3089   }
3090   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr);
3091 
3092   /* Graph's connected components analysis */
3093   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
3094 
3095   /* print some info to stdout */
3096   if (pcbddc->dbg_flag) {
3097     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr);
3098   }
3099 
3100   /* mark topography has done */
3101   pcbddc->recompute_topography = PETSC_FALSE;
3102   PetscFunctionReturn(0);
3103 }
3104 
3105 #undef __FUNCT__
3106 #define __FUNCT__ "PCBDDCSubsetNumbering"
3107 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
3108 {
3109   Vec            local_vec,global_vec;
3110   IS             seqis,paris;
3111   VecScatter     scatter_ctx;
3112   PetscScalar    *array;
3113   PetscInt       *temp_global_dofs;
3114   PetscScalar    globalsum;
3115   PetscInt       i,j,s;
3116   PetscInt       nlocals,first_index,old_index,max_local,max_global;
3117   PetscMPIInt    rank_prec_comm,size_prec_comm;
3118   PetscInt       *dof_sizes,*dof_displs;
3119   PetscBool      first_found;
3120   PetscErrorCode ierr;
3121 
3122   PetscFunctionBegin;
3123   /* mpi buffers */
3124   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
3125   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
3126   j = ( !rank_prec_comm ? size_prec_comm : 0);
3127   ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr);
3128   /* get maximum size of subset */
3129   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
3130   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
3131   max_local = 0;
3132   for (i=0;i<n_local_dofs;i++) {
3133     if (max_local < temp_global_dofs[i] ) {
3134       max_local = temp_global_dofs[i];
3135     }
3136   }
3137   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
3138   max_global++;
3139   max_local = 0;
3140   for (i=0;i<n_local_dofs;i++) {
3141     if (max_local < local_dofs[i] ) {
3142       max_local = local_dofs[i];
3143     }
3144   }
3145   max_local++;
3146   /* allocate workspace */
3147   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
3148   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
3149   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
3150   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
3151   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
3152   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
3153   /* create scatter */
3154   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
3155   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
3156   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
3157   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
3158   ierr = ISDestroy(&paris);CHKERRQ(ierr);
3159   /* init array */
3160   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
3161   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3162   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3163   if (local_dofs_mult) {
3164     for (i=0;i<n_local_dofs;i++) {
3165       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
3166     }
3167   } else {
3168     for (i=0;i<n_local_dofs;i++) {
3169       array[local_dofs[i]]=1.0;
3170     }
3171   }
3172   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3173   /* scatter into global vec and get total number of global dofs */
3174   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3175   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3176   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
3177   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
3178   /* Fill global_vec with cumulative function for global numbering */
3179   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
3180   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
3181   nlocals = 0;
3182   first_index = -1;
3183   first_found = PETSC_FALSE;
3184   for (i=0;i<s;i++) {
3185     if (!first_found && PetscRealPart(array[i]) > 0.1) {
3186       first_found = PETSC_TRUE;
3187       first_index = i;
3188     }
3189     nlocals += (PetscInt)PetscRealPart(array[i]);
3190   }
3191   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3192   if (!rank_prec_comm) {
3193     dof_displs[0]=0;
3194     for (i=1;i<size_prec_comm;i++) {
3195       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3196     }
3197   }
3198   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3199   if (first_found) {
3200     array[first_index] += (PetscScalar)nlocals;
3201     old_index = first_index;
3202     for (i=first_index+1;i<s;i++) {
3203       if (PetscRealPart(array[i]) > 0.1) {
3204         array[i] += array[old_index];
3205         old_index = i;
3206       }
3207     }
3208   }
3209   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
3210   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3211   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3212   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3213   /* get global ordering of local dofs */
3214   ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3215   if (local_dofs_mult) {
3216     for (i=0;i<n_local_dofs;i++) {
3217       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
3218     }
3219   } else {
3220     for (i=0;i<n_local_dofs;i++) {
3221       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
3222     }
3223   }
3224   ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3225   /* free workspace */
3226   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
3227   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
3228   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
3229   ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr);
3230   /* return pointer to global ordering of local dofs */
3231   *global_numbering_subset = temp_global_dofs;
3232   PetscFunctionReturn(0);
3233 }
3234 
3235 #undef __FUNCT__
3236 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3237 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3238 {
3239   PetscInt       i,j;
3240   PetscScalar    *alphas;
3241   PetscErrorCode ierr;
3242 
3243   PetscFunctionBegin;
3244   /* this implements stabilized Gram-Schmidt */
3245   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3246   for (i=0;i<n;i++) {
3247     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3248     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3249     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3250   }
3251   ierr = PetscFree(alphas);CHKERRQ(ierr);
3252   PetscFunctionReturn(0);
3253 }
3254 
3255 #undef __FUNCT__
3256 #define __FUNCT__ "MatISGetSubassemblingPattern"
3257 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
3258 {
3259   Mat             subdomain_adj;
3260   IS              new_ranks,ranks_send_to;
3261   MatPartitioning partitioner;
3262   Mat_IS          *matis;
3263   PetscInt        n_neighs,*neighs,*n_shared,**shared;
3264   PetscInt        prank;
3265   PetscMPIInt     size,rank,color;
3266   PetscInt        *xadj,*adjncy,*oldranks;
3267   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
3268   PetscInt        i,local_size,threshold=0;
3269   PetscErrorCode  ierr;
3270   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3271   PetscSubcomm    subcomm;
3272 
3273   PetscFunctionBegin;
3274   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3275   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3276   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3277 
3278   /* Get info on mapping */
3279   matis = (Mat_IS*)(mat->data);
3280   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3281   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3282 
3283   /* build local CSR graph of subdomains' connectivity */
3284   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3285   xadj[0] = 0;
3286   xadj[1] = PetscMax(n_neighs-1,0);
3287   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3288   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3289 
3290   if (threshold) {
3291     PetscInt xadj_count = 0;
3292     for (i=1;i<n_neighs;i++) {
3293       if (n_shared[i] > threshold) {
3294         adjncy[xadj_count] = neighs[i];
3295         adjncy_wgt[xadj_count] = n_shared[i];
3296         xadj_count++;
3297       }
3298     }
3299     xadj[1] = xadj_count;
3300   } else {
3301     if (xadj[1]) {
3302       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3303       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3304     }
3305   }
3306   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3307   if (use_square) {
3308     for (i=0;i<xadj[1];i++) {
3309       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3310     }
3311   }
3312   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3313 
3314   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3315 
3316   /*
3317     Restrict work on active processes only.
3318   */
3319   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3320   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3321   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3322   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3323   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3324   if (color) {
3325     ierr = PetscFree(xadj);CHKERRQ(ierr);
3326     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3327     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3328   } else {
3329     PetscInt coarsening_ratio;
3330     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
3331     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3332     prank = rank;
3333     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
3334     /*
3335     for (i=0;i<size;i++) {
3336       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3337     }
3338     */
3339     for (i=0;i<xadj[1];i++) {
3340       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3341     }
3342     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3343     ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3344     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3345 
3346     /* Partition */
3347     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
3348     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3349     if (use_vwgt) {
3350       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3351       v_wgt[0] = local_size;
3352       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3353     }
3354     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3355     coarsening_ratio = size/n_subdomains;
3356     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3357     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3358     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3359     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3360 
3361     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3362     if (contiguous) {
3363       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3364     } else {
3365       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3366     }
3367     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3368     /* clean up */
3369     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3370     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3371     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3372     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3373   }
3374   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3375 
3376   /* assemble parallel IS for sends */
3377   i = 1;
3378   if (color) i=0;
3379   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3380 
3381   /* get back IS */
3382   *is_sends = ranks_send_to;
3383   PetscFunctionReturn(0);
3384 }
3385 
3386 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3387 
3388 #undef __FUNCT__
3389 #define __FUNCT__ "MatISSubassemble"
3390 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3391 {
3392   Mat                    local_mat;
3393   Mat_IS                 *matis;
3394   IS                     is_sends_internal;
3395   PetscInt               rows,cols,new_local_rows;
3396   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3397   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3398   ISLocalToGlobalMapping l2gmap;
3399   PetscInt*              l2gmap_indices;
3400   const PetscInt*        is_indices;
3401   MatType                new_local_type;
3402   /* buffers */
3403   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3404   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3405   PetscInt               *recv_buffer_idxs_local;
3406   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3407   /* MPI */
3408   MPI_Comm               comm,comm_n;
3409   PetscSubcomm           subcomm;
3410   PetscMPIInt            n_sends,n_recvs,commsize;
3411   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3412   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3413   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3414   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3415   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3416   PetscErrorCode         ierr;
3417 
3418   PetscFunctionBegin;
3419   /* TODO: add missing checks */
3420   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3421   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3422   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3423   PetscValidLogicalCollectiveInt(mat,nis,7);
3424   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3425   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3426   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3427   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3428   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3429   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3430   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3431   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3432     PetscInt mrows,mcols,mnrows,mncols;
3433     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3434     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3435     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3436     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3437     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3438     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3439   }
3440   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3441   PetscValidLogicalCollectiveInt(mat,bs,0);
3442   /* prepare IS for sending if not provided */
3443   if (!is_sends) {
3444     PetscBool pcontig = PETSC_TRUE;
3445     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3446     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3447   } else {
3448     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3449     is_sends_internal = is_sends;
3450   }
3451 
3452   /* get pointer of MATIS data */
3453   matis = (Mat_IS*)mat->data;
3454 
3455   /* get comm */
3456   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3457 
3458   /* compute number of sends */
3459   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3460   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3461 
3462   /* compute number of receives */
3463   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3464   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3465   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3466   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3467   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3468   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3469   ierr = PetscFree(iflags);CHKERRQ(ierr);
3470 
3471   /* restrict comm if requested */
3472   subcomm = 0;
3473   destroy_mat = PETSC_FALSE;
3474   if (restrict_comm) {
3475     PetscMPIInt color,subcommsize;
3476 
3477     color = 0;
3478     if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */
3479     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3480     subcommsize = commsize - subcommsize;
3481     /* check if reuse has been requested */
3482     if (reuse == MAT_REUSE_MATRIX) {
3483       if (*mat_n) {
3484         PetscMPIInt subcommsize2;
3485         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3486         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3487         comm_n = PetscObjectComm((PetscObject)*mat_n);
3488       } else {
3489         comm_n = PETSC_COMM_SELF;
3490       }
3491     } else { /* MAT_INITIAL_MATRIX */
3492       PetscMPIInt rank;
3493 
3494       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3495       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3496       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3497       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3498       comm_n = PetscSubcommChild(subcomm);
3499     }
3500     /* flag to destroy *mat_n if not significative */
3501     if (color) destroy_mat = PETSC_TRUE;
3502   } else {
3503     comm_n = comm;
3504   }
3505 
3506   /* prepare send/receive buffers */
3507   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3508   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3509   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3510   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3511   if (nis) {
3512     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3513   }
3514 
3515   /* Get data from local matrices */
3516   if (!isdense) {
3517     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3518     /* TODO: See below some guidelines on how to prepare the local buffers */
3519     /*
3520        send_buffer_vals should contain the raw values of the local matrix
3521        send_buffer_idxs should contain:
3522        - MatType_PRIVATE type
3523        - PetscInt        size_of_l2gmap
3524        - PetscInt        global_row_indices[size_of_l2gmap]
3525        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3526     */
3527   } else {
3528     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3529     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3530     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3531     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3532     send_buffer_idxs[1] = i;
3533     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3534     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3535     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3536     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3537     for (i=0;i<n_sends;i++) {
3538       ilengths_vals[is_indices[i]] = len*len;
3539       ilengths_idxs[is_indices[i]] = len+2;
3540     }
3541   }
3542   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3543   /* additional is (if any) */
3544   if (nis) {
3545     PetscMPIInt psum;
3546     PetscInt j;
3547     for (j=0,psum=0;j<nis;j++) {
3548       PetscInt plen;
3549       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3550       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3551       psum += len+1; /* indices + lenght */
3552     }
3553     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3554     for (j=0,psum=0;j<nis;j++) {
3555       PetscInt plen;
3556       const PetscInt *is_array_idxs;
3557       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3558       send_buffer_idxs_is[psum] = plen;
3559       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3560       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3561       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3562       psum += plen+1; /* indices + lenght */
3563     }
3564     for (i=0;i<n_sends;i++) {
3565       ilengths_idxs_is[is_indices[i]] = psum;
3566     }
3567     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3568   }
3569 
3570   buf_size_idxs = 0;
3571   buf_size_vals = 0;
3572   buf_size_idxs_is = 0;
3573   for (i=0;i<n_recvs;i++) {
3574     buf_size_idxs += (PetscInt)olengths_idxs[i];
3575     buf_size_vals += (PetscInt)olengths_vals[i];
3576     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3577   }
3578   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3579   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3580   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3581 
3582   /* get new tags for clean communications */
3583   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3584   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3585   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3586 
3587   /* allocate for requests */
3588   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3589   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3590   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3591   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3592   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3593   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3594 
3595   /* communications */
3596   ptr_idxs = recv_buffer_idxs;
3597   ptr_vals = recv_buffer_vals;
3598   ptr_idxs_is = recv_buffer_idxs_is;
3599   for (i=0;i<n_recvs;i++) {
3600     source_dest = onodes[i];
3601     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3602     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3603     ptr_idxs += olengths_idxs[i];
3604     ptr_vals += olengths_vals[i];
3605     if (nis) {
3606       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3607       ptr_idxs_is += olengths_idxs_is[i];
3608     }
3609   }
3610   for (i=0;i<n_sends;i++) {
3611     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3612     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3613     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3614     if (nis) {
3615       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3616     }
3617   }
3618   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3619   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3620 
3621   /* assemble new l2g map */
3622   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3623   ptr_idxs = recv_buffer_idxs;
3624   new_local_rows = 0;
3625   for (i=0;i<n_recvs;i++) {
3626     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3627     ptr_idxs += olengths_idxs[i];
3628   }
3629   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3630   ptr_idxs = recv_buffer_idxs;
3631   new_local_rows = 0;
3632   for (i=0;i<n_recvs;i++) {
3633     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3634     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3635     ptr_idxs += olengths_idxs[i];
3636   }
3637   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3638   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3639   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3640 
3641   /* infer new local matrix type from received local matrices type */
3642   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3643   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3644   if (n_recvs) {
3645     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3646     ptr_idxs = recv_buffer_idxs;
3647     for (i=0;i<n_recvs;i++) {
3648       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3649         new_local_type_private = MATAIJ_PRIVATE;
3650         break;
3651       }
3652       ptr_idxs += olengths_idxs[i];
3653     }
3654     switch (new_local_type_private) {
3655       case MATDENSE_PRIVATE:
3656         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3657           new_local_type = MATSEQAIJ;
3658           bs = 1;
3659         } else { /* if I receive only 1 dense matrix */
3660           new_local_type = MATSEQDENSE;
3661           bs = 1;
3662         }
3663         break;
3664       case MATAIJ_PRIVATE:
3665         new_local_type = MATSEQAIJ;
3666         bs = 1;
3667         break;
3668       case MATBAIJ_PRIVATE:
3669         new_local_type = MATSEQBAIJ;
3670         break;
3671       case MATSBAIJ_PRIVATE:
3672         new_local_type = MATSEQSBAIJ;
3673         break;
3674       default:
3675         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3676         break;
3677     }
3678   } else { /* by default, new_local_type is seqdense */
3679     new_local_type = MATSEQDENSE;
3680     bs = 1;
3681   }
3682 
3683   /* create MATIS object if needed */
3684   if (reuse == MAT_INITIAL_MATRIX) {
3685     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3686     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3687   } else {
3688     /* it also destroys the local matrices */
3689     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3690   }
3691   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3692   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3693 
3694   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3695 
3696   /* Global to local map of received indices */
3697   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3698   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3699   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3700 
3701   /* restore attributes -> type of incoming data and its size */
3702   buf_size_idxs = 0;
3703   for (i=0;i<n_recvs;i++) {
3704     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3705     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3706     buf_size_idxs += (PetscInt)olengths_idxs[i];
3707   }
3708   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3709 
3710   /* set preallocation */
3711   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3712   if (!newisdense) {
3713     PetscInt *new_local_nnz=0;
3714 
3715     ptr_vals = recv_buffer_vals;
3716     ptr_idxs = recv_buffer_idxs_local;
3717     if (n_recvs) {
3718       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3719     }
3720     for (i=0;i<n_recvs;i++) {
3721       PetscInt j;
3722       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3723         for (j=0;j<*(ptr_idxs+1);j++) {
3724           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3725         }
3726       } else {
3727         /* TODO */
3728       }
3729       ptr_idxs += olengths_idxs[i];
3730     }
3731     if (new_local_nnz) {
3732       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3733       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3734       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3735       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3736       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3737       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3738     } else {
3739       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3740     }
3741     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3742   } else {
3743     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3744   }
3745 
3746   /* set values */
3747   ptr_vals = recv_buffer_vals;
3748   ptr_idxs = recv_buffer_idxs_local;
3749   for (i=0;i<n_recvs;i++) {
3750     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3751       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3752       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3753       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3754       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3755       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3756     } else {
3757       /* TODO */
3758     }
3759     ptr_idxs += olengths_idxs[i];
3760     ptr_vals += olengths_vals[i];
3761   }
3762   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3763   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3764   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3765   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3766   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3767   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3768 
3769 #if 0
3770   if (!restrict_comm) { /* check */
3771     Vec       lvec,rvec;
3772     PetscReal infty_error;
3773 
3774     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3775     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3776     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3777     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3778     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3779     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3780     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3781     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3782     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3783   }
3784 #endif
3785 
3786   /* assemble new additional is (if any) */
3787   if (nis) {
3788     PetscInt **temp_idxs,*count_is,j,psum;
3789 
3790     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3791     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3792     ptr_idxs = recv_buffer_idxs_is;
3793     psum = 0;
3794     for (i=0;i<n_recvs;i++) {
3795       for (j=0;j<nis;j++) {
3796         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3797         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3798         psum += plen;
3799         ptr_idxs += plen+1; /* shift pointer to received data */
3800       }
3801     }
3802     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3803     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3804     for (i=1;i<nis;i++) {
3805       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3806     }
3807     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3808     ptr_idxs = recv_buffer_idxs_is;
3809     for (i=0;i<n_recvs;i++) {
3810       for (j=0;j<nis;j++) {
3811         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3812         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3813         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3814         ptr_idxs += plen+1; /* shift pointer to received data */
3815       }
3816     }
3817     for (i=0;i<nis;i++) {
3818       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3819       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3820       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3821     }
3822     ierr = PetscFree(count_is);CHKERRQ(ierr);
3823     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3824     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3825   }
3826   /* free workspace */
3827   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3828   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3829   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3830   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3831   if (isdense) {
3832     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3833     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3834   } else {
3835     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3836   }
3837   if (nis) {
3838     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3839     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3840   }
3841   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3842   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3843   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3844   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3845   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3846   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3847   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3848   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3849   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3850   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3851   ierr = PetscFree(onodes);CHKERRQ(ierr);
3852   if (nis) {
3853     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3854     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3855     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3856   }
3857   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3858   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3859     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3860     for (i=0;i<nis;i++) {
3861       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3862     }
3863   }
3864   PetscFunctionReturn(0);
3865 }
3866 
3867 /* temporary hack into ksp private data structure */
3868 #include <petsc/private/kspimpl.h>
3869 
3870 #undef __FUNCT__
3871 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3872 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3873 {
3874   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3875   PC_IS                  *pcis = (PC_IS*)pc->data;
3876   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3877   MatNullSpace           CoarseNullSpace=NULL;
3878   ISLocalToGlobalMapping coarse_islg;
3879   IS                     coarse_is,*isarray;
3880   PetscInt               i,im_active=-1,active_procs=-1;
3881   PetscInt               nis,nisdofs,nisneu;
3882   PC                     pc_temp;
3883   PCType                 coarse_pc_type;
3884   KSPType                coarse_ksp_type;
3885   PetscBool              multilevel_requested,multilevel_allowed;
3886   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3887   Mat                    t_coarse_mat_is;
3888   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3889   PetscMPIInt            all_procs;
3890   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3891   PetscBool              compute_vecs = PETSC_FALSE;
3892   PetscScalar            *array;
3893   PetscErrorCode         ierr;
3894 
3895   PetscFunctionBegin;
3896   /* Assign global numbering to coarse dofs */
3897   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3898     PetscInt ocoarse_size;
3899     compute_vecs = PETSC_TRUE;
3900     ocoarse_size = pcbddc->coarse_size;
3901     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3902     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3903     /* see if we can avoid some work */
3904     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3905       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3906         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3907         coarse_reuse = PETSC_FALSE;
3908       } else { /* we can safely reuse already computed coarse matrix */
3909         coarse_reuse = PETSC_TRUE;
3910       }
3911     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3912       coarse_reuse = PETSC_FALSE;
3913     }
3914     /* reset any subassembling information */
3915     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3916     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3917   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3918     coarse_reuse = PETSC_TRUE;
3919   }
3920 
3921   /* count "active" (i.e. with positive local size) and "void" processes */
3922   im_active = !!(pcis->n);
3923   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3924   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3925   void_procs = all_procs-active_procs;
3926   csin_type_simple = PETSC_TRUE;
3927   redist = PETSC_FALSE;
3928   if (pcbddc->current_level && void_procs) {
3929     csin_ml = PETSC_TRUE;
3930     ncoarse_ml = void_procs;
3931     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
3932     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
3933       csin_ds = PETSC_TRUE;
3934       ncoarse_ds = pcbddc->redistribute_coarse;
3935       redist = PETSC_TRUE;
3936     } else {
3937       csin_ds = PETSC_TRUE;
3938       ncoarse_ds = active_procs;
3939       redist = PETSC_TRUE;
3940     }
3941   } else {
3942     csin_ml = PETSC_FALSE;
3943     ncoarse_ml = all_procs;
3944     if (void_procs) {
3945       csin_ds = PETSC_TRUE;
3946       ncoarse_ds = void_procs;
3947       csin_type_simple = PETSC_FALSE;
3948     } else {
3949       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
3950         csin_ds = PETSC_TRUE;
3951         ncoarse_ds = pcbddc->redistribute_coarse;
3952         redist = PETSC_TRUE;
3953       } else {
3954         csin_ds = PETSC_FALSE;
3955         ncoarse_ds = all_procs;
3956       }
3957     }
3958   }
3959 
3960   /*
3961     test if we can go multilevel: three conditions must be satisfied:
3962     - we have not exceeded the number of levels requested
3963     - we can actually subassemble the active processes
3964     - we can find a suitable number of MPI processes where we can place the subassembled problem
3965   */
3966   multilevel_allowed = PETSC_FALSE;
3967   multilevel_requested = PETSC_FALSE;
3968   if (pcbddc->current_level < pcbddc->max_levels) {
3969     multilevel_requested = PETSC_TRUE;
3970     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3971       multilevel_allowed = PETSC_FALSE;
3972     } else {
3973       multilevel_allowed = PETSC_TRUE;
3974     }
3975   }
3976   /* determine number of process partecipating to coarse solver */
3977   if (multilevel_allowed) {
3978     ncoarse = ncoarse_ml;
3979     csin = csin_ml;
3980     redist = PETSC_FALSE;
3981   } else {
3982     ncoarse = ncoarse_ds;
3983     csin = csin_ds;
3984   }
3985 
3986   /* creates temporary l2gmap and IS for coarse indexes */
3987   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3988   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3989 
3990   /* creates temporary MATIS object for coarse matrix */
3991   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
3992   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3993   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
3994   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3995 #if 0
3996   {
3997     PetscViewer viewer;
3998     char filename[256];
3999     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
4000     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
4001     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4002     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
4003     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4004   }
4005 #endif
4006   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
4007   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
4008   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4009   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4010   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
4011 
4012   /* compute dofs splitting and neumann boundaries for coarse dofs */
4013   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
4014     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
4015     const PetscInt         *idxs;
4016     ISLocalToGlobalMapping tmap;
4017 
4018     /* create map between primal indices (in local representative ordering) and local primal numbering */
4019     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
4020     /* allocate space for temporary storage */
4021     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
4022     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
4023     /* allocate for IS array */
4024     nisdofs = pcbddc->n_ISForDofsLocal;
4025     nisneu = !!pcbddc->NeumannBoundariesLocal;
4026     nis = nisdofs + nisneu;
4027     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
4028     /* dofs splitting */
4029     for (i=0;i<nisdofs;i++) {
4030       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4031       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4032       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4033       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4034       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4035       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4036       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4037       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4038     }
4039     /* neumann boundaries */
4040     if (pcbddc->NeumannBoundariesLocal) {
4041       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4042       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4043       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4044       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4045       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4046       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4047       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4048       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4049     }
4050     /* free memory */
4051     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4052     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4053     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4054   } else {
4055     nis = 0;
4056     nisdofs = 0;
4057     nisneu = 0;
4058     isarray = NULL;
4059   }
4060   /* destroy no longer needed map */
4061   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4062 
4063   /* restrict on coarse candidates (if needed) */
4064   coarse_mat_is = NULL;
4065   if (csin) {
4066     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4067       if (redist) {
4068         PetscMPIInt rank;
4069         PetscInt    spc,n_spc_p1,dest[1],destsize;
4070 
4071         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4072         spc = active_procs/ncoarse;
4073         n_spc_p1 = active_procs%ncoarse;
4074         if (im_active) {
4075           destsize = 1;
4076           if (rank > n_spc_p1*(spc+1)-1) {
4077             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4078           } else {
4079             dest[0] = rank/(spc+1);
4080           }
4081         } else {
4082           destsize = 0;
4083         }
4084         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4085       } else if (csin_type_simple) {
4086         PetscMPIInt rank;
4087         PetscInt    issize,isidx;
4088 
4089         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4090         if (im_active) {
4091           issize = 1;
4092           isidx = (PetscInt)rank;
4093         } else {
4094           issize = 0;
4095           isidx = -1;
4096         }
4097         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4098       } else { /* get a suitable subassembling pattern from MATIS code */
4099         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4100       }
4101 
4102       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4103       if (!redist || ncoarse <= void_procs) {
4104         PetscInt ncoarse_cand,tissize,*nisindices;
4105         PetscInt *coarse_candidates;
4106         const PetscInt* tisindices;
4107 
4108         /* get coarse candidates' ranks in pc communicator */
4109         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4110         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4111         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4112           if (!coarse_candidates[i]) {
4113             coarse_candidates[ncoarse_cand++]=i;
4114           }
4115         }
4116         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4117 
4118 
4119         if (pcbddc->dbg_flag) {
4120           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4121           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4122           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4123           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4124           for (i=0;i<ncoarse_cand;i++) {
4125             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4126           }
4127           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4128           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4129         }
4130         /* shift the pattern on coarse candidates */
4131         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4132         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4133         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4134         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4135         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4136         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4137         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4138       }
4139       if (pcbddc->dbg_flag) {
4140         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4141         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4142         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4143         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4144       }
4145     }
4146     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4147     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4148   } else {
4149     if (pcbddc->dbg_flag) {
4150       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4151       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4152       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4153     }
4154     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4155     coarse_mat_is = t_coarse_mat_is;
4156   }
4157 
4158   /* create local to global scatters for coarse problem */
4159   if (compute_vecs) {
4160     PetscInt lrows;
4161     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4162     if (coarse_mat_is) {
4163       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4164     } else {
4165       lrows = 0;
4166     }
4167     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4168     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4169     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4170     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4171     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4172   }
4173   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4174   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4175 
4176   /* set defaults for coarse KSP and PC */
4177   if (multilevel_allowed) {
4178     coarse_ksp_type = KSPRICHARDSON;
4179     coarse_pc_type = PCBDDC;
4180   } else {
4181     coarse_ksp_type = KSPPREONLY;
4182     coarse_pc_type = PCREDUNDANT;
4183   }
4184 
4185   /* print some info if requested */
4186   if (pcbddc->dbg_flag) {
4187     if (!multilevel_allowed) {
4188       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4189       if (multilevel_requested) {
4190         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4191       } else if (pcbddc->max_levels) {
4192         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4193       }
4194       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4195     }
4196   }
4197 
4198   /* create the coarse KSP object only once with defaults */
4199   if (coarse_mat_is) {
4200     MatReuse coarse_mat_reuse;
4201     PetscViewer dbg_viewer = NULL;
4202     if (pcbddc->dbg_flag) {
4203       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4204       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4205     }
4206     if (!pcbddc->coarse_ksp) {
4207       char prefix[256],str_level[16];
4208       size_t len;
4209       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4210       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4211       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4212       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4213       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4214       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4215       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4216       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4217       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4218       /* prefix */
4219       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4220       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4221       if (!pcbddc->current_level) {
4222         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4223         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4224       } else {
4225         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4226         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4227         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4228         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4229         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4230         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4231       }
4232       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4233       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4234       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4235       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4236       ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4237       /* allow user customization */
4238       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4239     }
4240     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4241     if (nisdofs) {
4242       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4243       for (i=0;i<nisdofs;i++) {
4244         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4245       }
4246     }
4247     if (nisneu) {
4248       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4249       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4250     }
4251 
4252     /* get some info after set from options */
4253     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4254     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4255     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4256     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4257     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4258       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4259       isbddc = PETSC_FALSE;
4260     }
4261     if (isredundant) {
4262       KSP inner_ksp;
4263       PC inner_pc;
4264       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4265       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4266       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4267     }
4268 
4269     /* assemble coarse matrix */
4270     if (coarse_reuse) {
4271       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4272       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4273       coarse_mat_reuse = MAT_REUSE_MATRIX;
4274     } else {
4275       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4276     }
4277     if (isbddc || isnn) {
4278       if (pcbddc->coarsening_ratio > 1) {
4279         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4280           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4281           if (pcbddc->dbg_flag) {
4282             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4283             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4284             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4285             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4286           }
4287         }
4288         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4289       } else {
4290         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4291         coarse_mat = coarse_mat_is;
4292       }
4293     } else {
4294       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4295     }
4296     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4297 
4298     /* propagate symmetry info to coarse matrix */
4299     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
4300     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4301 
4302     /* set operators */
4303     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4304     if (pcbddc->dbg_flag) {
4305       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4306     }
4307   } else { /* processes non partecipating to coarse solver (if any) */
4308     coarse_mat = 0;
4309   }
4310   ierr = PetscFree(isarray);CHKERRQ(ierr);
4311 #if 0
4312   {
4313     PetscViewer viewer;
4314     char filename[256];
4315     sprintf(filename,"coarse_mat.m");
4316     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4317     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4318     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4319     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4320   }
4321 #endif
4322 
4323   /* Compute coarse null space (special handling by BDDC only) */
4324   if (pcbddc->NullSpace) {
4325     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4326   }
4327 
4328   if (pcbddc->coarse_ksp) {
4329     Vec crhs,csol;
4330     PetscBool ispreonly;
4331     if (CoarseNullSpace) {
4332       if (isbddc) {
4333         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4334       } else {
4335         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4336       }
4337     }
4338     /* setup coarse ksp */
4339     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4340     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4341     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4342     /* hack */
4343     if (!csol) {
4344       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4345     }
4346     if (!crhs) {
4347       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4348     }
4349     /* Check coarse problem if in debug mode or if solving with an iterative method */
4350     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4351     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4352       KSP       check_ksp;
4353       KSPType   check_ksp_type;
4354       PC        check_pc;
4355       Vec       check_vec,coarse_vec;
4356       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4357       PetscInt  its;
4358       PetscBool compute_eigs;
4359       PetscReal *eigs_r,*eigs_c;
4360       PetscInt  neigs;
4361       const char *prefix;
4362 
4363       /* Create ksp object suitable for estimation of extreme eigenvalues */
4364       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4365       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4366       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4367       if (ispreonly) {
4368         check_ksp_type = KSPPREONLY;
4369         compute_eigs = PETSC_FALSE;
4370       } else {
4371         check_ksp_type = KSPGMRES;
4372         compute_eigs = PETSC_TRUE;
4373       }
4374       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4375       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4376       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4377       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4378       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4379       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4380       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4381       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4382       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4383       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4384       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4385       /* create random vec */
4386       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4387       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4388       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4389       if (CoarseNullSpace) {
4390         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4391       }
4392       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4393       /* solve coarse problem */
4394       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4395       if (CoarseNullSpace) {
4396         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4397       }
4398       /* set eigenvalue estimation if preonly has not been requested */
4399       if (compute_eigs) {
4400         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4401         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4402         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4403         lambda_max = eigs_r[neigs-1];
4404         lambda_min = eigs_r[0];
4405         if (pcbddc->use_coarse_estimates) {
4406           if (lambda_max>lambda_min) {
4407             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4408             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4409           }
4410         }
4411       }
4412 
4413       /* check coarse problem residual error */
4414       if (pcbddc->dbg_flag) {
4415         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4416         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4417         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4418         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4419         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4420         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4421         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4422         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4423         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4424         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4425         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4426         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4427         if (compute_eigs) {
4428           PetscReal lambda_max_s,lambda_min_s;
4429           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4430           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4431           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4432           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4433           for (i=0;i<neigs;i++) {
4434             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4435           }
4436         }
4437         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4438         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4439       }
4440       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4441       if (compute_eigs) {
4442         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4443         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4444       }
4445     }
4446   }
4447   /* print additional info */
4448   if (pcbddc->dbg_flag) {
4449     /* waits until all processes reaches this point */
4450     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4451     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4452     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4453   }
4454 
4455   /* free memory */
4456   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4457   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4458   PetscFunctionReturn(0);
4459 }
4460 
4461 #undef __FUNCT__
4462 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4463 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4464 {
4465   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4466   PC_IS*         pcis = (PC_IS*)pc->data;
4467   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4468   PetscInt       i,local_size,coarse_size=0;
4469   PetscInt       *local_primal_indices=NULL;
4470   PetscInt       *t_local_primal_indices=NULL;
4471   PetscErrorCode ierr;
4472 
4473   PetscFunctionBegin;
4474   /* Compute global number of coarse dofs */
4475   if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) {
4476     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
4477   }
4478   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult,&coarse_size,&t_local_primal_indices);CHKERRQ(ierr);
4479   ierr = PetscMalloc1(pcbddc->local_primal_size,&local_primal_indices);CHKERRQ(ierr);
4480   local_size = 0;
4481   for (i=0;i<pcbddc->local_primal_size_cc;i++) {
4482     PetscInt j;
4483     for (j=0;j<pcbddc->local_primal_ref_mult[i];j++) local_primal_indices[local_size++] = t_local_primal_indices[i] + j;
4484   }
4485   ierr = PetscFree(t_local_primal_indices);CHKERRQ(ierr);
4486   if (local_size != pcbddc->local_primal_size) {
4487     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size);
4488   }
4489 
4490   /* check numbering */
4491   if (pcbddc->dbg_flag) {
4492     PetscScalar coarsesum,*array;
4493     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4494 
4495     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4496     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4497     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4498     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4499     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4500     for (i=0;i<pcbddc->local_primal_size;i++) {
4501       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4502     }
4503     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4504     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4505     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4506     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4507     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4508     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4509     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4510     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4511     for (i=0;i<pcis->n;i++) {
4512       if (array[i] == 1.0) {
4513         set_error = PETSC_TRUE;
4514         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4515       }
4516     }
4517     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4518     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4519     for (i=0;i<pcis->n;i++) {
4520       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4521     }
4522     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4523     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4524     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4525     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4526     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4527     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4528     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4529       PetscInt *gidxs;
4530 
4531       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
4532       ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
4533       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4534       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4535       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4536       for (i=0;i<pcbddc->local_primal_size;i++) {
4537         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr);
4538       }
4539       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4540       ierr = PetscFree(gidxs);CHKERRQ(ierr);
4541     }
4542     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4543     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4544   }
4545   /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */
4546   /* get back data */
4547   *coarse_size_n = coarse_size;
4548   *local_primal_indices_n = local_primal_indices;
4549   PetscFunctionReturn(0);
4550 }
4551 
4552 #undef __FUNCT__
4553 #define __FUNCT__ "PCBDDCGlobalToLocal"
4554 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4555 {
4556   IS             localis_t;
4557   PetscInt       i,lsize,*idxs,n;
4558   PetscScalar    *vals;
4559   PetscErrorCode ierr;
4560 
4561   PetscFunctionBegin;
4562   /* get indices in local ordering exploiting local to global map */
4563   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4564   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4565   for (i=0;i<lsize;i++) vals[i] = 1.0;
4566   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4567   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4568   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4569   if (idxs) { /* multilevel guard */
4570     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4571   }
4572   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4573   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4574   ierr = PetscFree(vals);CHKERRQ(ierr);
4575   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4576   /* now compute set in local ordering */
4577   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4578   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4579   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4580   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4581   for (i=0,lsize=0;i<n;i++) {
4582     if (PetscRealPart(vals[i]) > 0.5) {
4583       lsize++;
4584     }
4585   }
4586   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4587   for (i=0,lsize=0;i<n;i++) {
4588     if (PetscRealPart(vals[i]) > 0.5) {
4589       idxs[lsize++] = i;
4590     }
4591   }
4592   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4593   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4594   *localis = localis_t;
4595   PetscFunctionReturn(0);
4596 }
4597 
4598 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4599 #undef __FUNCT__
4600 #define __FUNCT__ "PCBDDCMatMult_Private"
4601 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4602 {
4603   PCBDDCChange_ctx change_ctx;
4604   PetscErrorCode   ierr;
4605 
4606   PetscFunctionBegin;
4607   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4608   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4609   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4610   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4611   PetscFunctionReturn(0);
4612 }
4613 
4614 #undef __FUNCT__
4615 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4616 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4617 {
4618   PCBDDCChange_ctx change_ctx;
4619   PetscErrorCode   ierr;
4620 
4621   PetscFunctionBegin;
4622   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4623   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4624   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4625   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4626   PetscFunctionReturn(0);
4627 }
4628 
4629 #undef __FUNCT__
4630 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4631 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4632 {
4633   PC_IS               *pcis=(PC_IS*)pc->data;
4634   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4635   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4636   Mat                 S_j;
4637   PetscInt            *used_xadj,*used_adjncy;
4638   PetscBool           free_used_adj;
4639   PetscErrorCode      ierr;
4640 
4641   PetscFunctionBegin;
4642   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4643   free_used_adj = PETSC_FALSE;
4644   if (pcbddc->sub_schurs_layers == -1) {
4645     used_xadj = NULL;
4646     used_adjncy = NULL;
4647   } else {
4648     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4649       used_xadj = pcbddc->mat_graph->xadj;
4650       used_adjncy = pcbddc->mat_graph->adjncy;
4651     } else if (pcbddc->computed_rowadj) {
4652       used_xadj = pcbddc->mat_graph->xadj;
4653       used_adjncy = pcbddc->mat_graph->adjncy;
4654     } else {
4655       PetscBool      flg_row=PETSC_FALSE;
4656       const PetscInt *xadj,*adjncy;
4657       PetscInt       nvtxs;
4658 
4659       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4660       if (flg_row) {
4661         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4662         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4663         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4664         free_used_adj = PETSC_TRUE;
4665       } else {
4666         pcbddc->sub_schurs_layers = -1;
4667         used_xadj = NULL;
4668         used_adjncy = NULL;
4669       }
4670       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4671     }
4672   }
4673 
4674   /* setup sub_schurs data */
4675   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4676   if (!sub_schurs->use_mumps) {
4677     /* pcbddc->ksp_D up to date only if not using MUMPS */
4678     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4679     ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4680   } else {
4681     ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4682   }
4683   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4684 
4685   /* free adjacency */
4686   if (free_used_adj) {
4687     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4688   }
4689   PetscFunctionReturn(0);
4690 }
4691 
4692 #undef __FUNCT__
4693 #define __FUNCT__ "PCBDDCInitSubSchurs"
4694 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4695 {
4696   PC_IS               *pcis=(PC_IS*)pc->data;
4697   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4698   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4699   PCBDDCGraph         graph;
4700   PetscErrorCode      ierr;
4701 
4702   PetscFunctionBegin;
4703   /* attach interface graph for determining subsets */
4704   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4705     IS verticesIS;
4706 
4707     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4708     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4709     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr);
4710     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr);
4711     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4712     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4713 /*
4714     if (pcbddc->dbg_flag) {
4715       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4716     }
4717 */
4718   } else {
4719     graph = pcbddc->mat_graph;
4720   }
4721 
4722   /* sub_schurs init */
4723   ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr);
4724 
4725   /* free graph struct */
4726   if (pcbddc->sub_schurs_rebuild) {
4727     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4728   }
4729   PetscFunctionReturn(0);
4730 }
4731